# Pandas

#### Dataframes and Series

- dataframes is a multidimensional
- series is a one dimensional

In [71]:
# importing the libraries
import numpy as np
import pandas as pd

In [72]:
# creating a series
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [73]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [74]:
# creating a dataframe by passing a numpy array
dates = pd.date_range('20130101', periods=6)

In [75]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [76]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

TypeError: 'list' object is not callable

In [None]:
df

In [None]:
# get 1st row
df.iloc[0]

In [None]:
df.head(1)

In [None]:
# get columns
df.A

In [None]:
labels = ['W', 'X', 'Y', 'Z']
list = [10, 20, 30, 40]
array = np.array([10, 20, 30, 40])
dict = {'w': 10, 'x': 30, 'y': 20, 'z': 40}

In [None]:
pd.Series(data=list)

In [None]:
pd.Series(data=list, index=labels)

In [None]:
# creating a series based on a dictionary
pd.Series(dict)

In [None]:
a = {'w': 10, 'x': [30, 45, 89], 'y': ('ab', 'cd', 'ef'), 'z': {'a': 56, 'b': 89}}

In [None]:
pd.Series(a)

#### indexing a series

In [None]:
sports1 = pd.Series([1, 2, 3, 4], index= ['Cricket', 'Football', 'Basketball', 'Golf'])

In [None]:
sports2 = pd.Series([1, 2, 5, 4], index= ['Cricket', 'Hockey', 'Basketball', 'Golf'])

In [None]:
sports1

In [None]:
sports2

In [None]:
sports1['Cricket']

##### Operations are also done based on index

In [None]:
sports1 + sports2

In [None]:
np.random.seed(100)   # to fix the changing nature of random
dataframe = pd.DataFrame(np.random.randn(10, 5), index = 'A B C D E F G H I J'.split(), columns='Score1 Score2 Score3 Score4 Score5'.split())

In [None]:
dataframe

In [None]:
dataframe['Score1']

In [None]:
dataframe[['Score1', 'Score2']]

In [None]:
type(dataframe[['Score1', 'Score2']])

#### Adding a new column to the Dataframe

In [None]:
dataframe['Score6'] = dataframe['Score1'] + dataframe['Score2']

In [None]:
dataframe

#### Removing the columns from dataframe

In [None]:
dataframe.drop('Score6', axis=1) # will create a new dataframe with column removed and the existing dataframe remained unharmed

In [None]:
dataframe

In [None]:
dataframe.drop('Score6', axis=1, inplace=True) # will overwrite the existing dataframe and remove the column

In [None]:
dataframe

#### Dropping rows

In [None]:
dataframe.drop('A', axis=0)

#### Accessing Columns

In [None]:
dataframe['Score1']

In [None]:
dataframe.Score1

#### Accessing the rows

In [None]:
dataframe.loc['C']

In [None]:
dataframe.iloc[2]

#### Selecting the subset of rows and columns

In [None]:
dataframe.loc['C', 'Score1']

In [None]:
dataframe.loc[['C', 'D'], 'Score1']

In [None]:
dataframe.loc[['C', 'D'], ['Score1']]

In [None]:
dataframe.loc[['C', 'D'], ['Score1', 'Score2']]

#### Conditional Selection

In [None]:
dataframe

In [None]:
dataframe > 0.5

In [None]:
dataframe.iloc[6] > 0.3

In [None]:
dataframe[dataframe>0.5]

In [None]:
dataframe[ dataframe['Score1'] > 0.5 ]

In [None]:
dataframe[ dataframe['Score1'] > 0.5 ]['Score2']

In [None]:
dataframe

In [None]:
dataframe.reset_index()

In [None]:
newindex = 'IND JP CAN GE IT PL FY IU RT IP'.split()

In [None]:
newindex

In [None]:
dataframe['Countries'] = newindex

In [None]:
dataframe

In [None]:
dataframe.set_index('Countries', inplace=True)

In [None]:
dataframe

#### Missing values

In [None]:
dataframe = pd.DataFrame({'Cricket': [1, 2, np.nan, 7, 8, np.nan], 'Baseball': [1, 2, 45, 7, 89, 56], 'Hockey': [np.nan, 2, np.nan, 7, 8, 56]})

In [None]:
dataframe

In [None]:
dataframe.dropna()

In [None]:
dataframe.dropna(axis=1)

In [None]:
dataframe

In [None]:
dataframe.dropna(thresh=1)

In [None]:
dataframe.dropna(thresh=2)

In [None]:
dataframe.fillna(value=00)

In [None]:
dataframe['Cricket'].fillna(value=dataframe['Cricket'].mean())