In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_excel('india_export_list.xlsx')

In [4]:
df.head()

Unnamed: 0,Country,Value,Year
0,United States,$34.83B,2017
1,United Arab Emirates,$20.78B,2017
2,Hong Kong,$10.78B,2017
3,China,$9.10B,2017
4,Singapore,$7.52B,2017


In [5]:
# remove countries India doesn't exported from in 2017. 
df = df[df['Year'] == 2017]

In [6]:
# Get country names with codes from Plotly csv file of country-wise GDP on GitHub
codes_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

In [7]:
codes_df.head()

Unnamed: 0,COUNTRY,GDP (BILLIONS),CODE
0,Afghanistan,21.71,AFG
1,Albania,13.4,ALB
2,Algeria,227.8,DZA
3,American Samoa,0.75,ASM
4,Andorra,4.8,AND


In [8]:
pd.isnull(df).sum()

Country    0
Value      0
Year       0
dtype: int64

In [9]:
codes_df.drop('GDP (BILLIONS)', axis=1, inplace=True)

In [10]:
# rename columns of codes_df
codes_df.columns = ['Country', 'Code']

In [11]:
df['country'] = df['Country'].apply(lambda x: x.lower()) 
codes_df['country'] = codes_df['Country'].apply(lambda x: x.lower())

In [12]:
# take inner join of df and codes_df
export_df = pd.merge(df, codes_df, how='inner', on='country')
export_df.head()

Unnamed: 0,Country_x,Value,Year,country,Country_y,Code
0,United States,$34.83B,2017,united states,United States,USA
1,United Arab Emirates,$20.78B,2017,united arab emirates,United Arab Emirates,ARE
2,Hong Kong,$10.78B,2017,hong kong,Hong Kong,HKG
3,China,$9.10B,2017,china,China,CHN
4,Singapore,$7.52B,2017,singapore,Singapore,SGP


In [13]:
export_df.drop(['country','Country_y'], axis=1, inplace=True)
export_df.head()

Unnamed: 0,Country_x,Value,Year,Code
0,United States,$34.83B,2017,USA
1,United Arab Emirates,$20.78B,2017,ARE
2,Hong Kong,$10.78B,2017,HKG
3,China,$9.10B,2017,CHN
4,Singapore,$7.52B,2017,SGP


In [14]:
export_df.columns = ['country','export','year','code']
export_df.head()

Unnamed: 0,country,export,year,code
0,United States,$34.83B,2017,USA
1,United Arab Emirates,$20.78B,2017,ARE
2,Hong Kong,$10.78B,2017,HKG
3,China,$9.10B,2017,CHN
4,Singapore,$7.52B,2017,SGP


In [15]:
export_df['export'].apply(lambda x: str(x)[-1]).unique()

array(['B', 'M', 'K'], dtype=object)

In [16]:
def clean_export(x):
    x = str(x)[1:]
    if x[-1] == 'B':
        return float(x[:-1])
    elif x[-1] == 'M':
        return float(x[:-1])/1000
    elif x[-1] == 'K':
        return float(x[:-1])/1000000
    else:
        return float(x)/1000000000

In [17]:
export_df['export'] = export_df['export'].apply(clean_export)
export_df.head()

Unnamed: 0,country,export,year,code
0,United States,34.83,2017,USA
1,United Arab Emirates,20.78,2017,ARE
2,Hong Kong,10.78,2017,HKG
3,China,9.1,2017,CHN
4,Singapore,7.52,2017,SGP


In [18]:
export_df.sort_values(by='export', ascending=False)[:5]

Unnamed: 0,country,export,year,code
0,United States,34.83,2017,USA
1,United Arab Emirates,20.78,2017,ARE
2,Hong Kong,10.78,2017,HKG
3,China,9.1,2017,CHN
4,Singapore,7.52,2017,SGP


In [17]:
export_df.to_csv('export_cleaned.csv',index=False)

In [19]:
# Total Indian Exports
export_df['export'].sum()

209.11915393000001

In [18]:
import plotly
import plotly.graph_objs as go
from plotly import tools

In [19]:
plotly.offline.init_notebook_mode(connected=True)

In [20]:
data = [dict(
    type = 'choropleth',
    locations = export_df['code'],
    z = export_df['export'],
    text = export_df['country'],
    #colorscale = 'RdBu',
    #reversescale = True,
    colorbar = dict(
        tickprefix = '$',
        title = 'Export<br>Billions US$',
    )
)]

layout = dict(
    title = 'Export to Countries around the World by India in USD Billions',
    geo = dict(
        showframe = False,
        #showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

plotly.offline.iplot(dict(data = data, layout = layout), validate=False)

In [21]:
plotly.offline.plot(dict(data = data, layout = layout), validate=False, show_link=False, include_plotlyjs=False, output_type='div')

'<div id="44a0f817-5d2e-4311-a332-f9eb7a57e366" style="height: 100%; width: 100%;" class="plotly-graph-div"></div><script type="text/javascript">window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("44a0f817-5d2e-4311-a332-f9eb7a57e366", [{"type": "choropleth", "locations": ["USA", "ARE", "HKG", "CHN", "SGP", "GBR", "DEU", "VNM", "BGD", "BEL", "NPL", "ITA", "NLD", "MYS", "SAU", "TUR", "FRA", "JPN", "LKA", "ZAF", "AUS", "ESP", "MEX", "IDN", "THA", "ISR", "BRA", "IRN", "OMN", "EGY", "CAN", "NGA", "RUS", "KEN", "PHL", "PAK", "POL", "TZA", "KWT", "QAT", "IRQ", "MUS", "COL", "MOZ", "CHE", "SDN", "SWE", "DNK", "CHL", "PER", "DZA", "ETH", "PRT", "SEN", "ARG", "AFG", "GHA", "BHR", "UGA", "IRL", "BEN", "YEM", "BTN", "SOM", "TGO", "JOR", "AUT", "MAR", "CZE", "HUN", "GRC", "NOR", "ROU", "GIN", "NZL", "UKR", "GTM", "FIN", "ECU", "ZMB", "LBR", "SVN", "DJI", "TUN", "LBN", "AGO", "PAN", "MWI", "GIB", "MDV", "CMR", "MLT", "MDG", "DOM", "HRV", "ZWE", "BGR",

In [23]:
# create svg file
#plotly.offline.iplot(dict(data = data, layout = layout), validate=False, show_link=False, image='svg')