# World Economic Outlook Data Analysis
Data Source : IMF

In [None]:
import pandas as pd
df = pd.read_csv('WEOApr2020all.csv')
df.head(3)

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
type(df)

In [None]:
df['Country'].nunique()

In [None]:
df.iloc[:,::3]

In [None]:
df['Subject Descriptor'].head(7)

In [None]:
df[['Country','Subject Descriptor','2020']]

# Inflation

In [None]:
idx_inf = df['Subject Descriptor'].str.contains('Inflation, end of')
print(idx_inf)

In [None]:
df.loc[idx_inf]

In [None]:
df_inf = df[idx_inf]
df_inf.head(3)

In [None]:
df_inf_2021 = df_inf[['Country','2021']]
df_inf_2021

In [None]:
df_inf_2021.reset_index(drop=True, inplace=True)
df_inf_2021


In [None]:
df_inf_2021.dtypes

In [None]:
df_inf_2021.info()

In [None]:
# get only non-null data type
df_inf_2021 = df_inf_2021.loc[df_inf_2021['2021'].notnull()]
df_inf_2021

In [None]:
df_inf_2021.info()

In [None]:
df_inf_2021 = df_inf_2021.replace(',','',regex=True)
df_inf_2021

In [None]:
# change to numeric
df_inf_2021['2021'] = pd.to_numeric(df_inf_2021['2021'])
df_inf_2021

In [None]:
df_inf_2021.info()

In [None]:
# plot bar graph
df_inf_2021.sort_values('2021').plot.bar(x='Country')

In [None]:
df_inf_2021.sort_values('2021').iloc[0:20,:].plot.bar(x='Country')

# Unemployment Rate

In [None]:
df_ur = df[ df['Subject Descriptor'].str.contains('Unemployment')][['Country','2021']]
df_ur

In [None]:
df_ur.info()

In [None]:
df_ur = df_ur.loc[df_ur['2021'].notnull()]
df_ur.info()

In [None]:
df_ur.reset_index(drop=True, inplace=True)
df_ur

In [None]:
df_ur.info()

In [None]:
df_ur['2021'] = pd.to_numeric(df_ur['2021'])
df_ur

In [None]:
df_ur.info()

In [None]:
df_ur.plot.bar(x='Country')

In [None]:
df_ur.sort_values('2021', ascending=False).plot.bar(
    x='Country', 
    title='Unemployment Rate', 
    figsize=(15,5)
    )

In [None]:
df_ur_numpy = df_ur.sort_values('2021', ascending=False).to_numpy()
print(df_ur_numpy)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
plt.rcParams['figure.figsize'] =(20,3)
plt.xticks(rotation='vertical')
plt.bar(df_ur_numpy[:,0], df_ur_numpy[:,1])
plt.title("2021 Unemployment Rate - IMF World Eonomic Outlook Database, April 2020")
# color on Korea
idx = np.where(df_ur_numpy == 'Korea')
plt.bar(df_ur_numpy[idx[0],0], df_ur_numpy[idx[0], 1], label = 'Korea')
# color on United States
idx = np.where(df_ur_numpy == 'United States')
plt.bar(df_ur_numpy[idx[0],0], df_ur_numpy[idx[0], 1], label = 'United States')
# color on China
idx = np.where(df_ur_numpy == 'China')
plt.bar(df_ur_numpy[idx[0],0], df_ur_numpy[idx[0], 1], label = 'China')
plt.legend()

In [None]:
df.head(3)

In [None]:
len(df.columns)

In [None]:
idx = list(range(0,52))
print(idx)

In [None]:
idx[3]=0
idx[0]=3
print(idx) 

In [None]:
df.iloc[:,idx] # 순서가 바뀐 idx 값을 인자로 받음

# Add new column and group analysis

In [46]:
df_ur

Unnamed: 0,Country,2021
0,Albania,11.500
1,Algeria,13.909
2,Argentina,10.084
3,Armenia,18.389
4,Aruba,7.458
...,...,...
95,Turkey,15.567
96,Ukraine,9.318
97,United Kingdom,4.375
98,United States,9.135


In [48]:
df_ur['Criteria'] = 'NONE'
df_ur

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_ur['Criteria'] = 'NONE'


Unnamed: 0,Country,2021,Criteria
0,Albania,11.500,NONE
1,Algeria,13.909,NONE
2,Argentina,10.084,NONE
3,Armenia,18.389,NONE
4,Aruba,7.458,NONE
...,...,...,...
95,Turkey,15.567,NONE
96,Ukraine,9.318,NONE
97,United Kingdom,4.375,NONE
98,United States,9.135,NONE


In [51]:
df_ur.loc[df_ur['2021'] < 5, 'Criteria'] = 'Low'
df_ur.loc[(df_ur['2021'] >= 5) & (df_ur['2021'] < 10), 'Criteria'] = 'Medium'
df_ur.loc[df_ur['2021'] >= 10, 'Criteria'] = 'High'
df_ur

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,Country,2021,Criteria
0,Albania,11.500,High
1,Algeria,13.909,High
2,Argentina,10.084,High
3,Armenia,18.389,High
4,Aruba,7.458,Medium
...,...,...,...
95,Turkey,15.567,High
96,Ukraine,9.318,Medium
97,United Kingdom,4.375,Low
98,United States,9.135,Medium


In [52]:
df_ur.groupby(['Criteria']).mean()

Unnamed: 0_level_0,2021
Criteria,Unnamed: 1_level_1
High,14.76768
Low,3.547
Medium,7.100353


In [53]:
df_ur.groupby(['Criteria']).mean().sort_values('2021')

Unnamed: 0_level_0,2021
Criteria,Unnamed: 1_level_1
Low,3.547
Medium,7.100353
High,14.76768


In [54]:
df_ur.groupby(['Criteria']).count()

Unnamed: 0_level_0,Country,2021
Criteria,Unnamed: 1_level_1,Unnamed: 2_level_1
High,25,25
Low,24,24
Medium,51,51


In [None]:
for df_chunk in pd.read_csv('WEOApr2020all.csv', chunksize=5):
    print(df_chunk)


In [None]:
df_new = pd.DataFrame(columns=df.columns)
df_new

In [58]:
for df_chunk in pd.read_csv('WEOApr2020all.csv', chunksize=5):
    temp = df_chunk.loc[ df_chunk['Subject Descriptor'] == 'Unemployment rate']
    df_new = pd.concat([df_new, temp])

In [None]:
df_new