In [54]:
import numpy as np
import seaborn as sb 
import pandas as pd
import matplotlib.pyplot as plt
import scipy as stats

import plotly.graph_objs as go


In [55]:
data_exp = pd.read_csv('2010_2021_HS2_export.csv')
data_imp = pd.read_csv('2010_2021_HS2_import.csv')

In [56]:
print("Preview of export data:")
display(data_exp.sample(n=10))
# data.head()

print("Preview of import data:")
display(data_imp.sample(n=10))

Preview of export data:


Unnamed: 0,HSCode,Commodity,value,country,year
168372,71,"NATURAL OR CULTURED PEARLS,PRECIOUS OR SEMIPRE...",0.03,URUGUAY,2020
46553,82,"TOOLS IMPLEMENTS, CUTLERY, SPOONS AND FORKS, O...",0.51,BURKINA FASO,2013
26555,94,"FURNITURE; BEDDING, MATTRESSES, MATTRESS SUPPO...",12.39,SOUTH AFRICA,2011
179742,36,EXPLOSIVES; PYROTECHNIC PRODUCTS; MATCHES; PYR...,0.38,PANAMA REPUBLIC,2021
148527,36,EXPLOSIVES; PYROTECHNIC PRODUCTS; MATCHES; PYR...,0.11,PORTUGAL,2019
180261,56,"WADDING, FELT AND NONWOVENS; SPACIAL YARNS; TW...",1.32,PORTUGAL,2021
133513,69,CERAMIC PRODUCTS.,1.34,SEYCHELLES,2018
27515,84,"NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHA...",1.4,TAJIKISTAN,2011
48196,27,"MINERAL FUELS, MINERAL OILS AND PRODUCTS OF TH...",1.51,DOMINIC REP,2013
86183,37,PHOTOGRAPHIC OR CINEMATOGRAPHIC GOODS.,0.0,REUNION,2015


Preview of import data:


Unnamed: 0,HSCode,Commodity,value,country,year
86115,61,"ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, ...",0.0,ESTONIA,2020
17460,55,MAN-MADE STAPLE FIBRES.,9.98,BELARUS,2012
27965,84,"NUCLEAR REACTORS, BOILERS, MACHINERY AND MECHA...",0.01,GUYANA,2013
46230,25,SALT; SULPHUR; EARTHS AND STONE; PLASTERING MA...,0.04,MONGOLIA,2015
90517,8,EDIBLE FRUIT AND NUTS; PEEL OR CITRUS FRUIT OR...,,SIERRA LEONE,2020
62243,49,"PRINTED BOOKDS, NEWSPAPERS, PICTURES AND OTHER...",0.0,LUXEMBOURG,2017
79643,26,"ORES, SLAG AND ASH.",17.63,MALAYSIA,2019
62832,73,ARTICLES OF IRON OR STEEL,,MOZAMBIQUE,2017
31208,25,SALT; SULPHUR; EARTHS AND STONE; PLASTERING MA...,131.28,RUSSIA,2013
41934,21,MISCELLANEOUS EDIBLE PREPARATIONS.,0.49,AUSTRALIA,2015


In [57]:
def description(df):
    summary = pd.DataFrame(df.dtypes, columns=['dtypes'])
    summary = summary.reset_index()
    summary['Name'] = summary['index']
    summary = summary[['Name','dtypes']]
    summary['Missing'] = df.isnull().sum().values
    summary['Uniques'] = df.nunique().values
    summary['First Value'] = df.loc[0].values
    summary['Second Value'] = df.loc[1].values
    summary['Third Value'] = df.loc[2].values
    return summary

print("Variable discription of export data:")
display(description(data_exp))

print("Variable discription of import data:")
display(description(data_imp))

Variable discription of export data:


Unnamed: 0,Name,dtypes,Missing,Uniques,First Value,Second Value,Third Value
0,HSCode,int64,0,98,2,3,4
1,Commodity,object,0,98,MEAT AND EDIBLE MEAT OFFAL.,"FISH AND CRUSTACEANS, MOLLUSCS AND OTHER AQUAT...",DAIRY PRODUCE; BIRDS' EGGS; NATURAL HONEY; EDI...
2,value,float64,19258,12944,1.4,0.08,3.89
3,country,object,0,249,AFGHANISTAN,AFGHANISTAN,AFGHANISTAN
4,year,int64,0,12,2010,2010,2010


Variable discription of import data:


Unnamed: 0,Name,dtypes,Missing,Uniques,First Value,Second Value,Third Value
0,HSCode,int64,0,98,7,8,9
1,Commodity,object,0,98,EDIBLE VEGETABLES AND CERTAIN ROOTS AND TUBERS.,EDIBLE FRUIT AND NUTS; PEEL OR CITRUS FRUIT OR...,"COFFEE, TEA, MATE AND SPICES."
2,value,float64,15745,11062,9.14,93.82,2.54
3,country,object,0,243,AFGHANISTAN,AFGHANISTAN,AFGHANISTAN
4,year,int64,0,12,2010,2010,2010


In [58]:
print("Export data with zero values:")
display(data_exp[data_exp.value == 0].head(5))

print("Export data with NAN:")
display(data_exp.isnull().sum())

print("Import data with zero values:")
display(data_imp[data_imp.value == 0].head(5))

print("Import data with NAN:")
display(data_imp.isnull().sum())

Export data with zero values:


Unnamed: 0,HSCode,Commodity,value,country,year
14,16,"PREPARATIONS OF MEAT, OF FISH OR OF CRUSTACEAN...",0.0,AFGHANISTAN,2010
21,23,RESIDUES AND WASTE FROM THE FOOD INDUSTRIES; P...,0.0,AFGHANISTAN,2010
31,35,ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GL...,0.0,AFGHANISTAN,2010
55,65,HEADGEAR AND PARTS THEREOF.,0.0,AFGHANISTAN,2010
56,66,"UMBRELLAS, SUN UMBRELLAS, WALKING-STICKS, SEAT...",0.0,AFGHANISTAN,2010


Export data with NAN:


HSCode           0
Commodity        0
value        19258
country          0
year             0
dtype: int64

Import data with zero values:


Unnamed: 0,HSCode,Commodity,value,country,year
5,16,"PREPARATIONS OF MEAT, OF FISH OR OF CRUSTACEAN...",0.0,AFGHANISTAN,2010
6,18,COCOA AND COCOA PREPARATIONS.,0.0,AFGHANISTAN,2010
9,27,"MINERAL FUELS, MINERAL OILS AND PRODUCTS OF TH...",0.0,AFGHANISTAN,2010
17,58,SPECIAL WOVEN FABRICS; TUFTED TEXTILE FABRICS;...,0.0,AFGHANISTAN,2010
18,63,OTHER MADE UP TEXTILE ARTICLES; SETS; WORN CLO...,0.0,AFGHANISTAN,2010


Import data with NAN:


HSCode           0
Commodity        0
value        15745
country          0
year             0
dtype: int64

In [59]:
data_exp = data_exp.dropna()
data_exp['country'] = data_exp['country'].replace({'U S A' : 'USA'})
data_exp = data_exp.reset_index(drop=True)

data_imp = data_imp.dropna()
data_imp['country'] = data_imp['country'].replace({'U S A' : 'USA'})
data_imp = data_imp.reset_index(drop=True) 

In [60]:
export_year = data_exp.groupby('year').agg({'value' : 'sum'})
export_year = export_year.rename(columns={'value' : 'Export'})

import_year = data_imp.groupby('year').agg({'value' : 'sum'})
import_year = import_year.rename(columns={'value' : 'Import'})

export_year['Growth Rate(E)'] = export_year.pct_change()
import_year['Growth Rate(I)'] = import_year.pct_change()

total_year = pd.concat([export_year, import_year], axis = 1)
total_year['Trade Deficit'] = export_year.Export - import_year.Import

print('Export/Import and Trade Balance of India')
display(total_year)
print('Descriptive statistics')
display(total_year.describe())

Export/Import and Trade Balance of India


Unnamed: 0_level_0,Export,Growth Rate(E),Import,Growth Rate(I),Trade Deficit
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010,249801.18,,369762.25,,-119961.07
2011,305948.28,0.224767,489311.81,0.323315,-183363.53
2012,300384.32,-0.018186,490730.07,0.002898,-190345.75
2013,314388.61,0.046621,450192.99,-0.082606,-135804.38
2014,310321.02,-0.012938,448026.63,-0.004812,-137705.61
2015,262274.3,-0.154829,381000.97,-0.149602,-118726.67
2016,275835.27,0.051705,384350.29,0.008791,-108515.02
2017,303507.85,0.100323,465574.02,0.211327,-162066.17
2018,330058.64,0.08748,514071.33,0.104167,-184012.69
2019,313341.14,-0.05065,474701.75,-0.076584,-161360.61


Descriptive statistics


Unnamed: 0,Export,Growth Rate(E),Import,Growth Rate(I),Trade Deficit
count,12.0,11.0,12.0,11.0,12.0
mean,306636.203333,0.059246,456266.375,0.065641,-149630.171667
std,43052.939353,0.162889,69331.700692,0.219915,33102.919639
min,249801.18,-0.154829,369762.25,-0.169101,-191061.04
25%,287800.9125,-0.034418,391909.3075,-0.079595,-183525.82
50%,304728.065,0.046621,457883.505,0.002898,-149533.11
75%,313603.0075,0.093901,489666.375,0.157747,-119652.47
max,421984.37,0.446195,613045.41,0.554261,-102639.52


In [61]:
trace1 = go.Bar(
    x = total_year.index,
    y = total_year.Export,
    name = "Export",
    marker = dict(color = 'rgb(55, 83, 109)', line = dict(color = 'rgb(0, 0, 0)', width = 1.5)),
    text = total_year.Export
)

trace2 = go.Bar(
    x = total_year.index,
    y = total_year.Import,
    name = "Import",
    marker = dict(color = 'rgb(26, 118, 255)', line = dict(color = 'rgb(0, 0, 0)', width = 1.5)),
    text = total_year.Import
)

layout = go.Layout(
    hovermode= 'closest', 
    title = 'Export/Import of Indian Trade from 2010 to 2021' ,
    xaxis = dict(title = 'Year'), 
    yaxis = dict(title = 'USD (millions)')
)
fig = go.Figure(data = [trace1,trace2], layout = layout)
fig.show()

In [62]:
trace1 = go.Scatter(
    x = total_year.index,
    y = total_year['Growth Rate(E)'],
    name = "Growth Rate(E)",
    line_color = 'deepskyblue',
    opacity = 0.8,
    text = total_year['Growth Rate(E)']
)

trace2 = go.Scatter(
    x = total_year.index,
    y = total_year['Growth Rate(I)'],
    name = "Growth Rate(I)",
    line_color = 'dimgray',
    opacity = 0.8,
    text = total_year['Growth Rate(I)']
)

layout = go.Layout(
    hovermode = 'closest',
    title = 'Export/Import Growth Rate of Indian Trade from 2010 to 2021',
    xaxis = dict(title = 'Year'),
    yaxis = dict(title = 'Growth Rate'),
)
fig = go.Figure(data = [trace1,trace2], layout=layout)
fig.show()

In [63]:
trace1 = go.Bar(
    x = total_year.index,
    y = total_year.Export,
    name = 'Export',
    marker = dict(color = 'rgb(55, 83, 109)', line = dict(color = 'rgb(0, 0, 0)', width = 1.5)),
    text = total_year.Export
)

trace2 = go.Bar(
    x = total_year.index,
    y = total_year.Import,
    name = 'Import',
    marker = dict(color = 'rgb(26, 118, 255)', line = dict(color = 'rgb(0, 0, 0)', width = 1.5)),
    text = total_year.Import
)

trace3 = go.Bar(
    x = total_year.index,
    y = total_year['Trade Deficit'],
    name = 'Trade Deficit',
    marker = dict(color = 'red', line = dict(color = 'rgb(0, 0, 0)', width = 1.5)),
    text = total_year['Trade Deficit']
)

layout = go.Layout(
    hovermode = 'closest',
    title = 'Export/Import and Trade Deficit of Indian Trade from 2010 to 2021',
    xaxis = dict(title = 'Year'),
    yaxis = dict(title = 'USD (millions)'),
)
fig = go.Figure(data = [trace1,trace2,trace3], layout=layout)
fig.show()