# Pandas

## Series

### Series is a 1D array

In [1]:
import pandas as pd
grades=pd.Series([87,100,94])
grades

0     87
1    100
2     94
dtype: int64

In [2]:
print(grades.count())
print(grades.mean())
print(grades.min())
print(grades.max())
print(grades.std())


3
93.66666666666667
87
100
6.506407098647712


In [3]:
print(grades.describe())

count      3.000000
mean      93.666667
std        6.506407
min       87.000000
25%       90.500000
50%       94.000000
75%       97.000000
max      100.000000
dtype: float64


In [4]:
grades = pd.Series([87, 100, 94], index=['Wally', 'Eva', 'Sam'])
grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [5]:
#Dictionary initializers
grades = pd.Series({'Wally': 87, 'Eva':100, 'Sam': 94})
grades

Wally     87
Eva      100
Sam       94
dtype: int64

In [11]:
print(grades['Eva'])
print(grades.Wally)

100
87


In [10]:
grades.values

array([ 87, 100,  94])

In [12]:
#Series of Strings
hardware=pd.Series(['Hammer','Saw','Wrench'])
hardware.str.contains('a')
#We can use any String functions in Series using .str.<any string functions>

0     True
1     True
2    False
dtype: bool

In [15]:
hardware.str.upper()

0    HAMMER
1       SAW
2    WRENCH
dtype: object

## DataFrames

In [19]:
import pandas as pd
grades_dict = {'Wally': [87, 96, 70],'Eva': [100, 87, 90],'Sam': [94, 77, 90], 'Katie': [100, 81,82],'Bob': [83, 65, 85]}
grades=pd.DataFrame(grades_dict)
print(grades)

   Wally  Eva  Sam  Katie  Bob
0     87  100   94    100   83
1     96   87   77     81   65
2     70   90   90     82   85


In [21]:
grades=pd.DataFrame(grades_dict, index=['Test1','Test2', 'Test3'])
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [22]:
grades.index = ['test1','test2','test3']
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
test1,87,100,94,100,83
test2,96,87,77,81,65
test3,70,90,90,82,85


In [23]:
grades.Eva

test1    100
test2     87
test3     90
Name: Eva, dtype: int64

In [26]:
print(grades.loc['test1'])

Wally     87
Eva      100
Sam       94
Katie    100
Bob       83
Name: test1, dtype: int64


In [36]:
print(grades.loc['test1':'test2'])
print(grades.loc[['test1','test3']])

       Wally  Eva  Sam  Katie  Bob
test1     87  100   94    100   83
test2     96   87   77     81   65
       Wally  Eva  Sam  Katie  Bob
test1     87  100   94    100   83
test3     70   90   90     82   85


In [30]:
print(grades.iloc[0])

Wally     87
Eva      100
Sam       94
Katie    100
Bob       83
Name: test1, dtype: int64


In [31]:
print(grades.iloc[0:2])

       Wally  Eva  Sam  Katie  Bob
test1     87  100   94    100   83
test2     96   87   77     81   65


In [38]:
grades.loc['test1':'test2',['Eva','Katie']]

Unnamed: 0,Eva,Katie
test1,100,100
test2,87,81


In [39]:
grades.iloc[[0,2], 0:3]

Unnamed: 0,Wally,Eva,Sam
test1,87,100,94
test3,70,90,90


In [40]:
grades[grades >= 90]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
test1,,100.0,94.0,100.0,
test2,96.0,,,,
test3,,90.0,90.0,,


In [43]:
print(grades.at['test2', 'Eva'])

87


In [44]:
print(grades.iat[2, 0])

70


In [46]:
print(grades.describe())

           Wally         Eva        Sam       Katie        Bob
count   3.000000    3.000000   3.000000    3.000000   3.000000
mean   84.333333   92.333333  87.000000   87.666667  77.666667
std    13.203535    6.806859   8.888194   10.692677  11.015141
min    70.000000   87.000000  77.000000   81.000000  65.000000
25%    78.500000   88.500000  83.500000   81.500000  74.000000
50%    87.000000   90.000000  90.000000   82.000000  83.000000
75%    91.500000   95.000000  92.000000   91.000000  84.000000
max    96.000000  100.000000  94.000000  100.000000  85.000000


In [48]:
pd.set_option('precision', 2)
print(grades.describe())

       Wally     Eva    Sam   Katie    Bob
count   3.00    3.00   3.00    3.00   3.00
mean   84.33   92.33  87.00   87.67  77.67
std    13.20    6.81   8.89   10.69  11.02
min    70.00   87.00  77.00   81.00  65.00
25%    78.50   88.50  83.50   81.50  74.00
50%    87.00   90.00  90.00   82.00  83.00
75%    91.50   95.00  92.00   91.00  84.00
max    96.00  100.00  94.00  100.00  85.00


In [50]:
print(grades.T)

       test1  test2  test3
Wally     87     96     70
Eva      100     87     90
Sam       94     77     90
Katie    100     81     82
Bob       83     65     85


In [51]:
print(grades.mean())

Wally    84.33
Eva      92.33
Sam      87.00
Katie    87.67
Bob      77.67
dtype: float64


In [49]:
grades.sort_index(ascending=False)#True=ascending

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
test3,70,90,90,82,85
test2,96,87,77,81,65
test1,87,100,94,100,83


In [52]:
grades.sort_index(axis=1)#axis=0 for rows

Unnamed: 0,Bob,Eva,Katie,Sam,Wally
test1,83,100,100,94,87
test2,65,87,81,77,96
test3,85,90,82,90,70


In [55]:
grades.sort_values(by='test1',axis=1,ascending=False)

Unnamed: 0,Eva,Katie,Sam,Wally,Bob
test1,100,100,94,87,83
test2,87,81,77,96,65
test3,90,82,90,70,85
