## Pandas
Created by: Isman Kurniawan

### Creating dataframe from dictionary

In [2]:
# importing pandas library
import pandas as pd

In [None]:
my_dict = {'apples': [3, 2, 0, 1],
        'oranges': [0, 3, 7, 2]}

In [None]:
my_df = pd.DataFrame(my_dict)
my_df

In [None]:
my_df.index = ['Robby', 'Thomas', 'Andrew', 'David']

In [None]:
my_df

### Creating dataframe by importing from csv file

In [None]:
my_df = pd.read_csv("purchases.csv")
my_df

In [None]:
my_df = pd.read_csv("purchases.csv", index_col=0)
my_df

### Converting dataframe to csv file

In [None]:
my_df.to_csv("purchases.csv")

### Viewing your data

In [None]:
movies_df = pd.read_csv("IMDB-Movie-Data.csv", index_col="Title")

In [None]:
movies_df.head()

In [None]:
movies_df.tail()

In [None]:
movies_df.info()

In [None]:
movies_df.columns = ['rank', 'genre', 'description', 'director', 
                     'actors', 'year', 'runtime', 'rating', 'votes', 
                     'revenue_millions', 'metascore']

In [None]:
movies_df.head()

### Slicing, selecting and extracting

### Selecting by column name

In [None]:
genre_col = movies_df['genre']
genre_col

In [None]:
type(genre_col)

In [None]:
genre_col = movies_df[['genre']]
genre_col

In [None]:
type(genre_col)

In [None]:
subset_movies = movies_df[['genre', 'rating']]
subset_movies.head()

### Selecting and slicing using .loc and .iloc

In [None]:
subset = movies_df.loc['Prometheus']
subset

In [None]:
type(subset)

In [None]:
subset = movies_df.loc[['Prometheus']]
subset

In [None]:
type(subset)

In [None]:
subset = movies_df.loc[['Prometheus','Split','Sing']]
subset

In [None]:
subset = movies_df.loc['Prometheus':'Sing']
subset

In [None]:
subset = movies_df.loc['Prometheus':'Sing',['genre','actors']]
subset

In [None]:
subset = movies_df.loc['Prometheus':'Sing','director':'runtime']
subset

In [None]:
subset = movies_df.iloc[1]
subset

In [None]:
type(subset)

In [None]:
subset = movies_df.iloc[[1]]
subset

In [None]:
type(subset)

In [None]:
subset = movies_df.iloc[1:5]
subset

In [None]:
subset = movies_df.iloc[1:5,2:4]
subset

### Selecting by condition

In [None]:
condition = (movies_df['director'] == "Ridley Scott")
subset = movies_df[condition]
subset

In [None]:
condition = (movies_df['rank'] <= 20)
subset = movies_df[condition]
subset.head()

In [None]:
condition_1 = (movies_df['director'] == 'Christopher Nolan')
condition_2 = (movies_df['director'] == 'Ridley Scott')
condition = condition_1 | condition_2
subset = movies_df[condition]
subset.head()

In [None]:
director = ['Christopher Nolan', 'Ridley Scott']
subset = movies_df[movies_df['director'].isin(director)]
subset.head()

### Combining dataframes

In [None]:
my_dict = {'apples': [3, 2, 0, 1],
        'oranges': [0, 3, 7, 2]}
my_df1 = pd.DataFrame(my_dict)
my_df1

In [None]:
my_dict = {'grape': [2, 1, 5, 2],
        'strawberry': [4, 2, 4, 5]}
my_df2 = pd.DataFrame(my_dict)
my_df2

In [None]:
my_df = pd.concat([my_df1, my_df2], axis=1)
my_df

In [None]:
my_dict = {'apples': [4, 2, 5, 6],
        'oranges': [2, 1, 6, 7]}
my_df3 = pd.DataFrame(my_dict)
my_df3

In [None]:
my_df = pd.concat([my_df1, my_df3], axis=0)
my_df.reset_index(drop=True, inplace=True)
my_df

In [3]:
my_dict = {"player_number": [1, 4, 9, 10, 23],
          "player_name": ["Alisson Becker", "Virgin van Dijk", "Firminho", "Sadio Mane", "Xherdan Shaqiri"],
          "position": ["goalkeeper", "defender", "forward", "forward", "forward"]}
my_df1 = pd.DataFrame(my_dict)
my_df1

Unnamed: 0,player_number,player_name,position
0,1,Alisson Becker,goalkeeper
1,4,Virgin van Dijk,defender
2,9,Firminho,forward
3,10,Sadio Mane,forward
4,23,Xherdan Shaqiri,forward


In [4]:
my_dict = {"player_number": [1, 3, 8, 9, 11],
          "nasionality": ["Brazil", "Brazil", " Guinea", "Brazil", "Egypt"]}
my_df2 = pd.DataFrame(my_dict)
my_df2

Unnamed: 0,player_number,nasionality
0,1,Brazil
1,3,Brazil
2,8,Guinea
3,9,Brazil
4,11,Egypt


In [5]:
new_df = pd.merge(my_df1, my_df2, on='player_number', how="inner")
new_df

Unnamed: 0,player_number,player_name,position,nasionality
0,1,Alisson Becker,goalkeeper,Brazil
1,9,Firminho,forward,Brazil


In [6]:
new_df = pd.merge(my_df1, my_df2, on='player_number', how="outer")
new_df

Unnamed: 0,player_number,player_name,position,nasionality
0,1,Alisson Becker,goalkeeper,Brazil
1,4,Virgin van Dijk,defender,
2,9,Firminho,forward,Brazil
3,10,Sadio Mane,forward,
4,23,Xherdan Shaqiri,forward,
5,3,,,Brazil
6,8,,,Guinea
7,11,,,Egypt
