# Day 24

## Pandas

In [1]:
import pandas as pd

In [2]:
pd.__version__

'0.25.1'

In [3]:
s = pd.Series(["Hello", ['World']])
s

0      Hello
1    [World]
dtype: object

In [6]:
s1 = pd.Series(["Hello", ['World']], index=['a', 'b'])
s1

a      Hello
b    [World]
dtype: object

In [8]:
s1['a']

'Hello'

In [10]:
s1.keys()

Index(['a', 'b'], dtype='object')

In [12]:
s.values

array(['Hello', list(['World'])], dtype=object)

In [14]:
s1 = pd.Series([1,2,3,4])
s2 = pd.Series([100, 200, 300, 400, 500])
s1 + s2

0    101.0
1    202.0
2    303.0
3    404.0
4      NaN
dtype: float64

In [17]:
# Cunstructing Series using dict
d = {'a':1, 'b':2, 'c':3, 'd':4}
ds = pd.Series(d)
ds

a    1
b    2
c    3
d    4
dtype: int64

In [18]:
# Slicing on Series
ds['b':'c']

b    2
c    3
dtype: int64

In [21]:
# intializing Series with same value...
s = pd.Series(5, index=[i for i in range(5)])
s

0    5
1    5
2    5
3    5
4    5
dtype: int64

In [31]:
# Cocatinate two series
pd.concat([s1, s2])

0      1
1      2
2      3
3      4
0    100
1    200
2    300
3    400
4    500
dtype: int64

In [26]:
pd.concat([s1, s2], axis=1)

Unnamed: 0,0,1
0,1.0,100
1,2.0,200
2,3.0,300
3,4.0,400
4,,500


#### DataFrames in Pandas

In [54]:
df = pd.DataFrame([[x for x in range(3)] for _ in range(3)], 
#                   index=["a", "b", "c"], 
                  columns=['x', 'y', 'z'])
df

Unnamed: 0,x,y,z
0,0,1,2
1,0,1,2
2,0,1,2


In [58]:
df['x'][0]

0

In [59]:
print(type(df))
print(type(df['x']))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [60]:
# Creating a dataframe using dictonary
s1 = pd.Series(['a', 'b', 'c'])
s2 = pd.Series([97, 98, 99])
df1 = pd.DataFrame({'Char':s1, 'ASCII':s2})
df1

Unnamed: 0,Char,ASCII
0,a,97
1,b,98
2,c,99


In [62]:
df2 = pd.concat([df, df1], axis=1)
df2

Unnamed: 0,x,y,z,Char,ASCII
0,0,1,2,a,97
1,0,1,2,b,98
2,0,1,2,c,99


In [67]:
pd.concat([df2, s1])

Unnamed: 0,0,ASCII,Char,x,y,z
0,,97.0,a,0.0,1.0,2.0
1,,98.0,b,0.0,1.0,2.0
2,,99.0,c,0.0,1.0,2.0
0,a,,,,,
1,b,,,,,
2,c,,,,,


In [68]:
s1 = pd.Series(['a', 'b', 'c'])
s2 = pd.Series([97, 98, 99])
df1 = pd.DataFrame({'Char':s1, 'ASCII':s2})
df1

Unnamed: 0,Char,ASCII
0,a,97
1,b,98
2,c,99


In [71]:
df3 = pd.concat([df1, df1], axis=1)
df3

Unnamed: 0,Char,ASCII,Char.1,ASCII.1
0,a,97,a,97
1,b,98,b,98
2,c,99,c,99


In [98]:
print(df3['Char'])
print(df3['Char'].loc[0, 'Char'])
print(df3['Char'].iloc[0, 0])

  Char Char
0    a    a
1    b    b
2    c    c
Char    a
Char    a
Name: 0, dtype: object
a


#### Display DF Elements

In [103]:
long_df = pd.concat([df, df])
long_df.head()


Unnamed: 0,x,y,z
0,0,1,2
1,0,1,2
2,0,1,2
0,0,1,2
1,0,1,2


In [104]:
long_df.tail()

Unnamed: 0,x,y,z
1,0,1,2
2,0,1,2
0,0,1,2
1,0,1,2
2,0,1,2


In [105]:
df.values

array([[0, 1, 2],
       [0, 1, 2],
       [0, 1, 2]], dtype=int64)

#### Queries in DataFrames

In [109]:
df3.columns = ['Char 1', 'ASCII 1', 'Char 2', 'ASCII 2']
df3

Unnamed: 0,Char 1,ASCII 1,Char 2,ASCII 2
0,a,97,a,97
1,b,98,b,98
2,c,99,c,99


In [111]:
df3[df3['ASCII 1'] < 99]

Unnamed: 0,Char 1,ASCII 1,Char 2,ASCII 2
0,a,97,a,97
1,b,98,b,98


In [114]:
df3[(df3['ASCII 1'] < 99) & (df3['ASCII 2']>97 )]

Unnamed: 0,Char 1,ASCII 1,Char 2,ASCII 2
1,b,98,b,98


### Applying any custom function on DF

In [124]:
def convert(x):
    return chr(x).upper()

In [125]:
df3

Unnamed: 0,Char 1,ASCII 1,Char 2,ASCII 2
0,a,97,a,97
1,b,98,b,98
2,c,99,c,99


In [150]:
conversion = df3['ASCII 2'].apply(convert)
conversion

0    A
1    B
2    C
Name: ASCII 2, dtype: object

In [127]:
new_df = pd.concat([df3, conversion])#, axis=1)
new_df

Unnamed: 0,0,ASCII 1,ASCII 2,Char 1,Char 2
0,,97.0,97.0,a,a
1,,98.0,98.0,b,b
2,,99.0,99.0,c,c
0,A,,,,
1,B,,,,
2,C,,,,


### Condition Check Functions for DF

In [129]:
pd.isna(new_df['ASCII 1'])

0    False
1    False
2    False
0     True
1     True
2     True
Name: ASCII 1, dtype: bool

## Saving Dataframe to Files

In [130]:
new_df.to_csv("new_dataFrame.csv")

In [131]:
# Work on iris dataset
from sklearn.datasets import load_iris

In [145]:
x = load_iris()


In [137]:
x.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [138]:
x['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [144]:
iris_df = pd.DataFrame(x['data'], columns=x['feature_names'])
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [151]:
avg_petal_length = round(iris_df["petal length (cm)"].mean(), 3)
avg_petal_length

3.758

In [152]:
iris_df['petal length (cm)'] = pd.DataFrame(
                        iris_df['petal length (cm)'].apply(lambda x:avg_petal_length)  )

In [154]:
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,3.758,0.2
1,4.9,3.0,3.758,0.2
2,4.7,3.2,3.758,0.2
3,4.6,3.1,3.758,0.2
4,5.0,3.6,3.758,0.2
