In [2]:
import pandas as pd
import numpy as np

In [8]:
# Load data
df_gdp_mainlandchina = pd.read_csv('raw_data/data-gdp-mainlandchina-2019.csv')
data_gdp_world = pd.read_csv('raw_data/WEO_Data.csv') 

df_pop_china = pd.read_csv('raw_data/data-pop-china-2017.csv')  

df_region_china = pd.read_csv('raw_data/data-economyregion-china.csv')  

In [6]:
# Merge dataframe for China
df_china = pd.DataFrame.merge(df_gdp_mainlandchina,df_pop_china,how='right',left_on='Provinces',right_on='Administrative Division')
df_china = pd.DataFrame.merge(df_china,df_region_china,how='left',left_on='Administrative Division',right_on='Provinces')

# Don't show Unnamed columns
df_china = df_china.loc[:, ~df_china.columns.str.contains('^Unnamed')] 

# Drop column
df_china = df_china.drop(['Provinces_x'], axis=1)
df_china = df_china.drop(['Provinces_y'], axis=1)

# Add column
df_china['Country'] = 'China'
df_china['Year'] = '2019'

# Concat 'CN' to all value in[Region]
df_china['Region'] = df_china['Region'].map(str) + '-CN'

df_china.tail()

Unnamed: 0,Nominal GDP (Billion),Administrative Division,Population,Region,Economic Zone #,Economic Zone,经济区,Characteristic,Country,Year
29,43.0,Qinghai,5980000,West-CN,10,Qinghai-Tibet Plateau,青藏高原经济区,"Natural gas, salt lake resources, non-ferrous ...",China,2019
30,25.0,Tibet,3370000,West-CN,10,Qinghai-Tibet Plateau,青藏高原经济区,"Natural gas, salt lake resources, non-ferrous ...",China,2019
31,,Hong Kong,7335384,Hong Kong-CN,11,Hong Kong,香港特别行政区,"Trade and logistics industry (21.2%), financia...",China,2019
32,,Macau,644900,Macau-CN,12,Macau,澳门港特别行政区,Tourism and apparel industry (41%),China,2019
33,,Taiwan,23562318,Taiwan-CN,13,Taiwan,台澎金马个别关税领域,Agricultural products and OEM,China,2019


In [10]:
# Get GDP value for Hong Kong, Macau and Taiwan
df_hongkong = data_gdp_world[data_gdp_world['Country'].str.contains('hong kong', case=False, na=False)]
hongkong_gdp = df_hongkong.loc[df_hongkong['Subject Descriptor'] == 'Gross domestic product, current prices', '2019'].values[0]

df_macau = data_gdp_world[data_gdp_world['Country'].str.contains('maca', case=False, na=False)]
macau_gdp = df_macau.loc[df_macau['Subject Descriptor'] == 'Gross domestic product, current prices', '2019'].values[0]

df_taiwan = data_gdp_world[data_gdp_world['Country'].str.contains('taiwan', case=False, na=False)]
taiwan_gdp = df_taiwan.loc[df_taiwan['Subject Descriptor'] == 'Gross domestic product, current prices', '2019'].values[0]

print('Hong Kong GDP: ',hongkong_gdp)
print('Macau GDP: ',macau_gdp)
print('Taiwan GDP: ',taiwan_gdp)

Hong Kong GDP:  372.99
Macau GDP:  55.14
Taiwan GDP:  586.1


In [12]:
# Assign value
## to Hong Kong
hongkong_index = df_china.loc[df_china['Administrative Division'] == 'Hong Kong'].index[0]
df_china.at[hongkong_index, 'Nominal GDP (Billion)'] = hongkong_gdp

## to Macau
macau_index = df_china.loc[df_china['Administrative Division'] == 'Macau'].index[0]
df_china.at[macau_index, 'Nominal GDP (Billion)'] = macau_gdp

## to Taiwan 
taiwan_index = df_china.loc[df_china['Administrative Division'] == 'Taiwan'].index[0]
df_china.at[taiwan_index, 'Nominal GDP (Billion)'] = taiwan_gdp

# Calculate GDP per Capita
df_china['GDP per Capita'] = (df_china['Nominal GDP (Billion)'] / df_china['Population'])*1000000000

# Re-order Columns
df_china = df_china[['Year','Country','Region','Economic Zone #','Economic Zone','Administrative Division','Nominal GDP (Billion)','Population','GDP per Capita','Characteristic']]

# Set 0 decimal places
df_china['GDP per Capita'] = df_china['GDP per Capita'].round(0)

df_china.head()

Unnamed: 0,Year,Country,Region,Economic Zone #,Economic Zone,Administrative Division,Nominal GDP (Billion),Population,GDP per Capita,Characteristic
0,2019,China,East-CN,4,Southeast Coast,Guangdong,1561.0,111690000,13976.0,"Export-oriented, digesting foreign advanced te..."
1,2019,China,East-CN,3,East Coast,Jiangsu,1444.0,80290000,17985.0,Multifunctional manufacturing: Light industria...
2,2019,China,East-CN,2,North Coast,Shandong,1030.0,100060000,10294.0,High-tech R & D and manufacturing
3,2019,China,East-CN,3,East Coast,Zhejiang,904.0,56570000,15980.0,Multifunctional manufacturing: Light industria...
4,2019,China,Middle-CN,5,Yellow River Upper and Middle Reaches,Henan,787.0,95590000,8233.0,"Coal mining and coal deep processing, natural ..."


In [14]:
df_china.to_csv('clean_data/clean_china_by_province.csv')