In [None]:
import pandas as pd
import numpy as np

### Creating a Series
#### From a Python List

In [None]:
labels = ['a','b','c']
my_list = [10,20,30]
arr = np.array([10,20,30])

In [None]:
pd.Series(arr)

In [None]:
pd.Series(arr, labels)

#### From a Dictionary

In [None]:
d = {'a':10,'b':20,'c':30}
pd.Series(d)

### Creating a DataFrame
#### From list

In [None]:
a = [i for i in range(1, 5)]
b = [i for i in range(5, 9)]
c = [i for i in range(9, 13)]


In [None]:
pd.DataFrame(data = [a,b,c],
             index = 'a b c'.split(),
             columns='w x y z'.split())


In [None]:
pd.DataFrame(data=list(zip(a,b,c)),
             index='w x y z'.split(),
             columns='a b c'.split())

In [None]:
my_list = [[1,2,3,4],
           [5,6,7,8],
           [9,10,11,12]]
pd.DataFrame(my_list,
             index = 'a b c'.split(),
             columns='w x y z'.split())

#### From an array

In [None]:
from numpy.random import randn
np.random.seed(101)

In [None]:
df = pd.DataFrame(randn(5,4),
                  index='A B C D E'.split(),
                  columns='W X Y Z'.split())
df

#### From a dictionary

In [None]:
my_d ={'a':[1,2,3,4],
       'b':[5,6,7,8],
       'c':[9,10,11,12]}

pd.DataFrame(my_d,
             index='W X Y Z'.split())

### Add a New Row

In [None]:
df.loc['new']=[1,2,3,4]
df

In [None]:
df['new'] = df['W']+ df['Y']
df

#### insert

In [None]:
df.insert(2, 'Colnew',[1,2,3,4,5,6])
df

### Deleting Data

#### Removing Columns
#### without inplace

In [None]:
df.drop('new',axis=1)

#### with inplace

In [None]:
df.drop('new', axis=1, inplace = True)
df

#### Removing rows

In [None]:
df.drop('E',axis=0)

### More Index Details

In [None]:
df.reset_index()

In [None]:
df.drop('Colnew',axis=1, inplace = True)

In [None]:
df.drop('new', axis=0, inplace=True)

In [None]:
newind = 'CA NY WY OR CO'.split()
newind

In [None]:
df['states'] = newind
df

In [None]:
df.set_index('states', inplace=True)

### Multi-Index

#### Creating Multi-index

In [None]:
#index level
outside=['jakarta','jakarta','jakarta',
         'surabaya', 'surabaya','surabaya']
inside=[1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index

In [None]:
hier_index=pd.MultiIndex.from_tuples(hier_index)
hier_index

#### Creating DataFrame with Multi-Index

In [None]:
df = pd.DataFrame(np.random.randint(1,100,(6,2)),
                  index=hier_index,
                  columns=['Restorant A', 'Restorant B'])
df

#### Indexing and Selecting

In [None]:
df.loc['jakarta']

In [None]:
df.xs('jakarta')

In [None]:
df.loc['jakarta'].loc[1]

In [None]:
df.xs(['jakarta',1])

### Multi-index and index Hierarchy

#### Index name

In [None]:
df.index.names

In [None]:
df.index.names=['city','Location']
df

#### Indexing anf Selecting

In [None]:
df.xs(1, level='Location')

### Sorting
#### Sorting by any columns

In [None]:
df.sort_values('name')

In [None]:
df.sort_values('name',ascending=False)

In [None]:
# Permanently saved the result
df.sort_values('name', ascending = False, inplace = True)
df

#### Sorting by more than one columns

In [None]:
df.sort_values(by=['gender','name'])

In [None]:
df.sort_values(by=['gender','name'], ascending = [False, False])


In [None]:
df.sort_values(
    by=['gender','name'],
    ascending=[False,False],
    inplace=True)
df

#### Sorting by index

In [None]:
df.sort_index()

In [None]:
# Permanently saved the result
df.sort_index(inplace=True)

## Missing Value

#### Create DataFrame with NaN Values

In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.DataFrame({'A':[1,2,np.nan],
                   'B':[5,np.nan,np.nan],
                   'C':[1,2,3]})
df

#### isna() method

In [None]:
df.isna()

In [None]:
df.isna().sum()

#### dropna() method

In [None]:
df.dropna()

In [None]:
df.dropna(axis=1)

In [None]:
df.dropna(axis=1)

#### fillna() method

In [None]:
df.fillna(value='FILL VALUE')

In [None]:
df['A'].fillna(value=df['A'].mean())

In [None]:
df

### Data Aggregating

#### Create DataFrame

In [None]:
import pandas as pd
#create dataframe
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
        'person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
        'Sales':[200,120,340,124,243,350]}

In [None]:
df = pd.DataFrame(data)
df

#### Groupby() method

In [None]:
df.groupby('Company')

In [None]:
by_comp = df.groupby('Company')

In [None]:
df.groupby('Company').mean()

In [None]:
by_comp.mean()

#### More Aggregate Methods

In [None]:
by_comp.std()

In [None]:
by_comp.min()

In [None]:
by_comp.max()

In [None]:
by_comp.count()

In [None]:
by_comp.describe()

In [None]:
by_comp.describe().transpose()

In [None]:
by_comp.describe().transpose()['GOOG']

In [None]:
by_comp.describe().transpose()['GOOG'].loc['Sales'].loc['25%']

## Pandas: Merging, Joining, and Concatenating

#### Dataframes to Join

In [None]:
df = {'key':['K0','K1','K2','K3'],
      'A':['A0','A1','A2','A3'],
      'B':['B0','B1','B2','B3']}
        
        


In [None]:
left = pd.DataFrame(df)
left

In [None]:
df = {'key':['K0','K1','K3','K4'],
      'C':['C0','C1','C2','C3'],
      'D':['D0','D1','D2','D3']}

In [None]:
right = pd.DataFrame(df)
right

#### Inner Join

In [None]:
pd.merge(left,right,on='key')

#### Outer Join

In [None]:
pd.merge(left,right,how='outer', on='key')

#### Right Join

In [None]:
pd.merge(left,right,how='right', on='key')

#### Left Join

In [None]:
pd.merge(left,right,how='left',on='key')

### Concatenating
#### DataFrames to Concat

In [None]:
df1 = {'A':['A0','A1','A2','A3'],
       'B':['B0','B1','B2','B3'],
       'C':['C0','C1','C2','C3'],
       'D':['D0','D1','D2','D3']}


In [None]:
df1 = pd.DataFrame(df1)
df1

In [None]:
df2 = {'A':['A4','A5','A6','A7'],
       'B':['B4','B5','B6','B7'],
       'C':['C4','C5','C6','C7'],
       'D':['D4','D5','D6','D7']}

In [None]:
df2 = pd.DataFrame(df2)
df2

In [None]:
df3 = {'A':['A8','A9','A10','A11'],
       'B':['B8','B9','B10','B11'],
       'C':['C8','C9','C10','C11'],
       'D':['D8','D9','D10','D11']}

In [None]:
df3 = pd.DataFrame(df3)
df3

#### Conacatenation

In [None]:
pd.concat([df1,df2,df3])

In [None]:
pd.concat([df1,df2,df3],axis=1)

## Pandas Operations

### Columns Operation using Function

In [None]:
# create dataframe
import pandas as pd
df = pd.DataFrame({'col1':[1,2,3,4],
                   'col2':[444,555,666,444],
                   'col3':['abc','def','ghi','xyz']})
df.head()

#### Applying Functions

In [None]:
def times2(x):
    return x*2

In [None]:
df['col1'].apply(times2)

In [None]:
len(df['col3'])

In [None]:
df['col3'].apply(len)

### Columns Operation using lambda

#### Applying Lambda Functions

In [None]:
df['col1'].apply(lambda x:x*2)

In [None]:
df['col3'].apply(lambda x: x[1])

In [None]:
df['col3'].apply(lambda x: len(x))

#### Math Operation

In [None]:
df['col4'] = df['col1'] + df['col2']
df

In [None]:
df['col4'] = df['col1'] - df['col2']
df

In [None]:
df['col4'] = df['col1'] / df['col2']
df

In [None]:
df['col4'] = df['col1'] * df['col2']
df

### Pivot Table

In [None]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
        'B':['one','one','two','two','one','one'],
        'C':['x','y','x','y','x','y'],
        'D':[1,3,2,5,4,1]}

In [None]:
df = pd.DataFrame(data)
df

In [None]:
df.pivot_table(values='D',index=['A','B'],columns=['C'])

## Pandas: Data Input and Output

#### CSV

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('example')
df

In [None]:
df.to_csv('example',index=False)

#### Excel

In [None]:
pd.read_excel('Excel_Sample.xlsx', sheet_name='Sheet1')

In [None]:
df.to_excel('Excel_Sample.xlsx', sheet_name='Sheet1')

#### JSON

In [None]:
df = pd.DataFrame({
    'nama': ['Andi','Budi','Caca'],
    'umur': [24,18,30],
    'pekerjaan': ['Front End Developer','Back-End Developer','Mobile Developer']
})
df.head()

#### HTML

In [None]:
import pandas as pd
df  = pd.read_html('https://www.fdic.gov/bank/individual/failed/banklist.html')
df[0].head()