# Introduction into Pandas

In [1]:
import pandas as pd

In [2]:
students = ['Alice', 'Jack', 'Mark']
pd.Series(students)

0    Alice
1     Jack
2     Mark
dtype: object

In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
students = ['Alice', 'Jack', None]
pd.Series(students)

0    Alice
1     Jack
2     None
dtype: object

In [5]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
import numpy as np

In [7]:
np.nan

nan

In [8]:
np.nan == None

False

In [9]:
np.nan == np.nan

False

In [10]:
np.isnan(np.nan)

True

In [11]:
students_scores = {'Alice': 4, 'Jack': 5}
s = pd.Series(students_scores)
s

Alice    4
Jack     5
dtype: int64

In [12]:
s.index

Index(['Alice', 'Jack'], dtype='object')

In [13]:
studs = [('J', 'Jack'), ('M', 'Green')]
s = pd.Series(studs)
s

0     (J, Jack)
1    (M, Green)
dtype: object

In [14]:
s = pd.Series(['Jojo','Mock','Clasher'], index=[1,2,3])
s

1       Jojo
2       Mock
3    Clasher
dtype: object

## Querying a Series

In [15]:
stu_classes = {'Alice': 'Physics', 
               'Jack': 'Chemistry', 
               'Molly': 'Math'}
s = pd.Series(stu_classes)
s

Alice      Physics
Jack     Chemistry
Molly         Math
dtype: object

In [16]:
s.iloc

<pandas.core.indexing._iLocIndexer at 0x7f1f00c592c8>

In [17]:
s.iloc[0]

'Physics'

In [18]:
s.iloc[2]

'Math'

In [19]:
s.loc['Molly']

'Math'

In [20]:
s[1]

'Chemistry'

In [21]:
s['Jack']

'Chemistry'

**Note:**
If an index is provided as a list of integers, queying s[0] will call s.loc[0] instead of s.iloc[0]!

In [22]:
grades = pd.Series([5,4,4,5,3])
grades

0    5
1    4
2    4
3    5
4    3
dtype: int64

In [23]:
grades.mean()

4.2

In [24]:
grades.max()

5

In [25]:
np.sum(grades)

21

In [26]:
numbers = pd.Series(np.random.randint(0,1000,100))
numbers

0     860
1     465
2     180
3     609
4     526
     ... 
95    377
96    117
97    153
98    499
99    931
Length: 100, dtype: int64

In [27]:
numbers.head()

0    860
1    465
2    180
3    609
4    526
dtype: int64

In [28]:
len(numbers)

100

In [29]:
numbers.shape

(100,)

In [30]:
s = pd.Series(list(range(5)))
s

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [31]:
s.loc['History'] = 102
s

0            0
1            1
2            2
3            3
4            4
History    102
dtype: int64

In [32]:
s.iloc[4] = 400
s

0            0
1            1
2            2
3            3
4          400
History    102
dtype: int64

In [33]:
stu = {'Kelly':'Math','Sam':'History'}
s = pd.Series(stu)
s

Kelly       Math
Sam      History
dtype: object

In [34]:
k = pd.Series(['Chemistry', 'Phylosophy'], index=['Kelly','Kelly'])
k

Kelly     Chemistry
Kelly    Phylosophy
dtype: object

In [35]:
k['Kelly']

Kelly     Chemistry
Kelly    Phylosophy
dtype: object

In [36]:
s.append(k)

Kelly          Math
Sam         History
Kelly     Chemistry
Kelly    Phylosophy
dtype: object

# Dataframe

In [37]:
rec1 = pd.Series({'name':'Alice','score':5})
rec2 = pd.Series({'name':'John','score':3})
rec3 = pd.Series({'name':'Helen','score':4})

In [38]:
students = ['student1','student2','student3']
df = pd.DataFrame([rec1,rec2,rec3], index=students)

In [39]:
df.head()

Unnamed: 0,name,score
student1,Alice,5
student2,John,3
student3,Helen,4


In [40]:
df.loc['student1']

name     Alice
score        5
Name: student1, dtype: object

In [41]:
df.loc['student2','name']

'John'

In [42]:
df.T

Unnamed: 0,student1,student2,student3
name,Alice,John,Helen
score,5,3,4


In [43]:
df.T.loc['score']

student1    5
student2    3
student3    4
Name: score, dtype: object

In [44]:
df['score']

student1    5
student2    3
student3    4
Name: score, dtype: int64

In [45]:
df.loc[:,'name']

student1    Alice
student2     John
student3    Helen
Name: name, dtype: object

In [46]:
df.drop('student2')

Unnamed: 0,name,score
student1,Alice,5
student3,Helen,4


In [47]:
df.drop('name',axis=1)

Unnamed: 0,score
student1,5
student2,3
student3,4


## csv

In [48]:
df.columns

Index(['name', 'score'], dtype='object')

In [49]:
df = df.rename(columns={'score':' points  '})
df

Unnamed: 0,name,points
student1,Alice,5
student2,John,3
student3,Helen,4


In [50]:
df.columns

Index(['name', ' points  '], dtype='object')

In [51]:
df = df.rename(mapper=str.strip,axis='columns')
df

Unnamed: 0,name,points
student1,Alice,5
student2,John,3
student3,Helen,4


In [52]:
df.columns

Index(['name', 'points'], dtype='object')

In [53]:
list(df.columns)

['name', 'points']

In [54]:
df[df['points']>=4]

Unnamed: 0,name,points
student1,Alice,5
student3,Helen,4


In [55]:
df.where(df['points']>=4)

Unnamed: 0,name,points
student1,Alice,5.0
student2,,
student3,Helen,4.0


In [56]:
df.where(df['points']>=4).dropna()

Unnamed: 0,name,points
student1,Alice,5.0
student3,Helen,4.0


In [57]:
(df['points']<4) | (df['points']>4)

student1     True
student2     True
student3    False
Name: points, dtype: bool

In [58]:
df['points'].gt(4) | df['points'].lt(4)

student1     True
student2     True
student3    False
Name: points, dtype: bool

In [59]:
df['points'].gt(5).lt(3)

student1    True
student2    True
student3    True
Name: points, dtype: bool

In [60]:
df

Unnamed: 0,name,points
student1,Alice,5
student2,John,3
student3,Helen,4


In [61]:
df['stu'] = df.index
df

Unnamed: 0,name,points,stu
student1,Alice,5,student1
student2,John,3,student2
student3,Helen,4,student3


In [62]:
df = df.set_index('name')
df

Unnamed: 0_level_0,points,stu
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,5,student1
John,3,student2
Helen,4,student3


In [63]:
df = df.reset_index()
df

Unnamed: 0,name,points,stu
0,Alice,5,student1
1,John,3,student2
2,Helen,4,student3


In [64]:
df=df.drop(['level_0','index'],axis=1)

KeyError: "['level_0' 'index'] not found in axis"

In [65]:
df

Unnamed: 0,name,points,stu
0,Alice,5,student1
1,John,3,student2
2,Helen,4,student3


In [66]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,NAME,POINTS,STU
0,Alice,5,student1
1,John,3,student2
2,Helen,4,student3


In [67]:
df.iloc(1)

<pandas.core.indexing._iLocIndexer at 0x7f1f00c09b38>

In [68]:
df.where(df['POINTS']!=4).isnull()

Unnamed: 0,NAME,POINTS,STU
0,False,False,False
1,False,False,False
2,True,True,True


In [69]:
df.where(df['POINTS']!=4).dropna()

Unnamed: 0,NAME,POINTS,STU
0,Alice,5.0,student1
1,John,3.0,student2


In [70]:
df.where(df['POINTS']!=4).fillna(0)

Unnamed: 0,NAME,POINTS,STU
0,Alice,5.0,student1
1,John,3.0,student2
2,0,0.0,0


In [71]:
df.where(df['POINTS']!=4).fillna(0).replace(0, 1)

Unnamed: 0,NAME,POINTS,STU
0,Alice,5.0,student1
1,John,3.0,student2
2,1,1.0,1


In [72]:
df.replace([3,4],[30,40])

Unnamed: 0,NAME,POINTS,STU
0,Alice,5,student1
1,John,30,student2
2,Helen,40,student3


In [76]:
df['NAME'].isin(['Alice','Helen'])

0     True
1    False
2     True
Name: NAME, dtype: bool

In [77]:
df.iloc[1]

NAME          John
POINTS           3
STU       student2
Name: 1, dtype: object

In [81]:
df.rename(mapper = lambda x: x.upper(), axis = 1)

Unnamed: 0,NAME,POINTS,STU
0,Alice,5,student1
1,John,3,student2
2,Helen,4,student3
