In [1]:
import pandas as pd
import numpy as np


## Series


In [2]:
label = ['a', 'b', 'c']
my_data = [10, 20, 30]
dt = pd.Series(data=my_data, index=label, dtype=np.int32)
print(dt)
# Print the data as a table
dt.to_frame()


a    10
b    20
c    30
dtype: int32


Unnamed: 0,0
a,10
b,20
c,30


## DataFrame


In [3]:
row = ['John', 'Bob', 'Alice', 'Mary', 'Mike']
col = ['age', 'state', 'score', 'grade']
data = [[23, 'CA', 90, 'A'], [20, 'NY', 80, 'B'], [
    25, 'TX', 70, 'C'], [27, 'FL', 60, 'D'], [30, 'WA', 50, 'F']]
df = pd.DataFrame(data=data, index=row, columns=col)
print(df, '\n')
df['gender'] = ['M', 'M', 'F', 'F', 'M']
print(df.loc[['John']], '\n')
print(df.info(), '\n')
print(df.describe())


       age state  score grade
John    23    CA     90     A
Bob     20    NY     80     B
Alice   25    TX     70     C
Mary    27    FL     60     D
Mike    30    WA     50     F 

      age state  score grade gender
John   23    CA     90     A      M 

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, John to Mike
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   age     5 non-null      int64 
 1   state   5 non-null      object
 2   score   5 non-null      int64 
 3   grade   5 non-null      object
 4   gender  5 non-null      object
dtypes: int64(2), object(3)
memory usage: 412.0+ bytes
None 

             age      score
count   5.000000   5.000000
mean   25.000000  70.000000
std     3.807887  15.811388
min    20.000000  50.000000
25%    23.000000  60.000000
50%    25.000000  70.000000
75%    27.000000  80.000000
max    30.000000  90.000000


# Handling Missing Data


In [4]:
# Mock dataframe for Nan value
df2 = pd.DataFrame({'A': [1, 2, np.nan], 'B': [5, np.nan, np.nan],
                    'C': [1, 2, 3]})
print(df2, '\n')

# Drop the row with Nan value
print(df2.dropna(axis=1, thresh=2), '\n')

# Fill the Nan value with 0
print(df2.fillna(value=0), '\n')


     A    B  C
0  1.0  5.0  1
1  2.0  NaN  2
2  NaN  NaN  3 

     A  C
0  1.0  1
1  2.0  2
2  NaN  3 

     A    B  C
0  1.0  5.0  1
1  2.0  0.0  2
2  0.0  0.0  3 



# Merging data


In [5]:
# Vietnam have many famous food,such as Pho, Bun Cha, Banh Mi, Banh Xeo, etc.
# Create a dataframe to store the information of these food, cost, and the place where you can find them.
df_food = pd.DataFrame({'Food': ['Pho', 'Bun Cha', 'Banh Mi', 'Banh Xeo'],
                        'Cost': [2, 3, 1, 2],
                        'Place': ['Ha Noi', 'Ha Noi', 'Ho Chi Minh', 'Da Nang']})
# make a unit for cost
df_food['Cost'] = df_food['Cost'].apply(lambda x: str(x) + ' USD')
# Food is the index
# df_food.set_index('Food', inplace=True)
print(df_food, '\n')

# Create another dataframe about place and its scenic spots, one place can have many scenic spots.
df_place = pd.DataFrame({'Place': ['Ha Noi', 'Ho Chi Minh', 'Da Nang'],
                         'Scenic Spots': ['West Lake',
                                          'Ben Thanh Market',
                                          'My Khe Beach']})

# df_place.set_index('Place', inplace=True)
print(df_place, '\n')

# Merge two dataframe
df_merge = pd.merge(df_food, df_place, on='Place')

print(df_merge, '\n')
# Group by Place and count the number of food in each place, print just the number of food, and name the column as 'Number of Food'
print(df_merge.groupby('Cost').min(), '\n')
# print df_merge with all columns in same row

       Food   Cost        Place
0       Pho  2 USD       Ha Noi
1   Bun Cha  3 USD       Ha Noi
2   Banh Mi  1 USD  Ho Chi Minh
3  Banh Xeo  2 USD      Da Nang 

         Place      Scenic Spots
0       Ha Noi         West Lake
1  Ho Chi Minh  Ben Thanh Market
2      Da Nang      My Khe Beach 

       Food   Cost        Place      Scenic Spots
0       Pho  2 USD       Ha Noi         West Lake
1   Bun Cha  3 USD       Ha Noi         West Lake
2   Banh Mi  1 USD  Ho Chi Minh  Ben Thanh Market
3  Banh Xeo  2 USD      Da Nang      My Khe Beach 

           Food        Place      Scenic Spots
Cost                                          
1 USD   Banh Mi  Ho Chi Minh  Ben Thanh Market
2 USD  Banh Xeo      Da Nang      My Khe Beach
3 USD   Bun Cha       Ha Noi         West Lake 

