# Introduction to Pandas

## Series

In [1]:
# importing the pandas library
import pandas as pd

In [2]:
# Python list of Strings
subjects = ["pyhsics", "chemistry", "biology", "mathematics"]

In [3]:
a = pd.Series(subjects)

In [4]:
a

0        pyhsics
1      chemistry
2        biology
3    mathematics
dtype: object

In [5]:
print(type(a))

<class 'pandas.core.series.Series'>


### Creating a Series object with Integers


In [6]:
numbers = [10, 20, 30, 40, 50]

In [7]:
b = pd.Series(numbers)

In [8]:
b

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [9]:
print(type(b))

<class 'pandas.core.series.Series'>


### Creating a Series object with Booleans


In [10]:
# Python list of booleans
boolean_values = [True, False, True, False, False]

In [11]:
c = pd.Series(boolean_values)

In [12]:
c

0     True
1    False
2     True
3    False
4    False
dtype: bool

In [13]:
print(type(c))

<class 'pandas.core.series.Series'>


### Creating a Series from Tuple

In [14]:
# Python tuple
text = ("hello", "welcome", "to", "pandas")

In [15]:
print(type(text))

<class 'tuple'>


In [16]:
x = pd.Series(text)

In [17]:
x

0      hello
1    welcome
2         to
3     pandas
dtype: object

In [18]:
print(type(x))

<class 'pandas.core.series.Series'>


### Creating a Series from dictionary

In [19]:
text1 = {"Name": "John", "Age": 30, "City": "New York"}

In [20]:
type(text1)

dict

In [21]:
d = pd.Series(text1)

In [22]:
d

Name        John
Age           30
City    New York
dtype: object

In [23]:
d.index

Index(['Name', 'Age', 'City'], dtype='object')

In [24]:
print(type(d))

<class 'pandas.core.series.Series'>


##  Series Attributes

In [25]:
friends = ["john", "peter", "sam", "ravi", "pavan", "dainel"]

In [26]:
s = pd.Series(friends)

In [27]:
s.head(3)

0     john
1    peter
2      sam
dtype: object

In [28]:
# prints the values of series object
s.values

array(['john', 'peter', 'sam', 'ravi', 'pavan', 'dainel'], dtype=object)

In [29]:
# displays the index values
s.index

RangeIndex(start=0, stop=6, step=1)

In [30]:
# Returns the numbber of rows, it has 6 rows and 0 columns
s.shape

(6,)

## Series Methods

In [31]:
sample = pd.Series([10, 20, 40, 62, 38, 20, 40, 70, 10, 40])

In [32]:
# displays the count  of elements in the sample
sample.count()

10

In [33]:
# displays the minimum value
sample.min()

10

In [34]:
# displays the max value value
sample.max()

70

In [35]:
# displays the mean(average)
sample.mean()

35.0

In [36]:
# displays the variance(average)
sample.std()

20.37972849017703

In [37]:
sample.sort_values()

0    10
8    10
1    20
5    20
4    38
2    40
6    40
9    40
3    62
7    70
dtype: int64

In [38]:
sample.mean()

35.0

## DataFrames

### Create a dataframe from the dictionary


In [39]:
data = [33, 66, 99, 45]

In [40]:
print(type(data))

<class 'list'>


In [41]:
df = pd.DataFrame(data)

In [42]:
df

Unnamed: 0,0
0,33
1,66
2,99
3,45


In [43]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [44]:
df = pd.DataFrame(data, columns=["age"])

In [45]:
df

Unnamed: 0,age
0,33
1,66
2,99
3,45


In [46]:
data1 = {
    "Students": ["John", "Peter", "Sam", "Jaine", "Suzain"],
    "Score": [70, 63, 77, 35, 90],
}

In [47]:
print(type(data1))

<class 'dict'>


In [48]:
df = pd.DataFrame(data1)

In [49]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [50]:
df

Unnamed: 0,Students,Score
0,John,70
1,Peter,63
2,Sam,77
3,Jaine,35
4,Suzain,90


In [51]:
students_score = [
    ["John", 80],
    ["Peter", 79],
    ["Sam", 90],
    ["Jaine", 76],
    ["Suzain", 85],
]

In [52]:
df = pd.DataFrame(students_score, columns=["Name", "score"])

In [53]:
df

Unnamed: 0,Name,score
0,John,80
1,Peter,79
2,Sam,90
3,Jaine,76
4,Suzain,85


### Creating a DataFrame object with List of Dictionaries


In [54]:
stock_details = [
    {"satyam": 128.0, "infosys": 1329, "google": 5000},
    {"satyam": 126, "infosys": 1322, "google": 5002},
]

In [55]:
print(type(stock_details))

<class 'list'>


In [56]:
df = pd.DataFrame(stock_details)

In [57]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [58]:
df

Unnamed: 0,google,infosys,satyam
0,5000,1329,128.0
1,5002,1322,126.0


In [59]:
data1 = {
    "Students": ["John", "Peter", "Sam", "Jaine", "Suzain"],
    "Score": [70, 63, 77, 35],
}

## Real time dataset

In [60]:
import pandas as pd

In [61]:
df = pd.read_csv("datasets/movies.csv")

In [62]:
# df is the dataframe object
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [63]:
# displays the first 3 rows
df.head(3)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance


In [64]:
# Default displays the firt 5 rows
df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [65]:
# displays the last 3 rows
df.tail(3)

Unnamed: 0,movieId,title,genres
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy


In [66]:
# Default displays the last 5 rows
df.tail()

Unnamed: 0,movieId,title,genres
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy


In [67]:
# displays the number of rows and columns
df.shape
# Dataframe has 9742 rows and 3 columns

(9742, 3)

In [68]:
# index starts at 0 and stops at 9742 with an incrementation of step 1
df.index

RangeIndex(start=0, stop=9742, step=1)

In [69]:
#  displays the count of each column
df.count()

movieId    9742
title      9742
genres     9742
dtype: int64

In [70]:
# displays the column names or headers
df.columns

Index(['movieId', 'title', 'genres'], dtype='object')

In [71]:
df.columns

Index(['movieId', 'title', 'genres'], dtype='object')

In [72]:
df.dtypes

movieId     int64
title      object
genres     object
dtype: object

In [73]:
df.size

29226