In [68]:
import pandas as pd
import chart_studio.plotly as py
import plotly.graph_objs as go 
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
%matplotlib inline

In [2]:
df = pd.read_csv('186_399_compressed_visitor-interests.csv.zip')
df.head(5)

Unnamed: 0,IP,UserAgent,Country,Languages,Interests
0,1.10.195.126,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,TH,english,other
1,1.1.217.211,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,TH,thai,blog/personal homepage
2,1.115.198.107,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.3...,JP,english,shop/online shopping website
3,1.121.152.143,Mozilla/5.0 (Linux; Android 7.0; Nexus 9 Build...,AU,english,company
4,1.123.135.213,Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X)...,AU,english,company


In [3]:
df_code = pd.read_html('https://www.iban.com/country-codes')
df_code[0].head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric
0,Afghanistan,AF,AFG,4
1,Åland Islands,AX,ALA,248
2,Albania,AL,ALB,8
3,Algeria,DZ,DZA,12
4,American Samoa,AS,ASM,16


In [4]:
dx=df_code[0][['Country','Alpha-2 code','Alpha-3 code']]
dx.columns = ['Country full name','Country','Alpha-3 code']

In [80]:
df_all = pd.merge(df,dx,on='Country').dropna(axis=0)
df_all.head()

Unnamed: 0,IP,UserAgent,Country,Languages,Interests,Country full name,Alpha-3 code
0,1.10.195.126,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,TH,english,other,Thailand,THA
1,1.1.217.211,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,TH,thai,blog/personal homepage,Thailand,THA
2,1.20.173.38,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...,TH,english,internet directory/search engine,Thailand,THA
3,1.20.175.155,Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKi...,TH,chinese,social network/forum,Thailand,THA
4,1.20.175.187,Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7....,TH,thai,torrent tracker,Thailand,THA


In [81]:
df_IP = df_all.groupby('Alpha-3 code',as_index=False).count()
df_IP.head()

Unnamed: 0,Alpha-3 code,IP,UserAgent,Country,Languages,Interests,Country full name
0,ABW,1,1,1,1,1,1
1,AFG,2,2,2,2,2,2
2,AGO,1,1,1,1,1,1
3,ALB,22,22,22,22,22,22
4,ARE,86,86,86,86,86,86


In [82]:
data = dict(
        type = 'choropleth',
        locations = df_IP['Alpha-3 code'],
        z = df_IP['IP'],
        text = df_IP['IP'],
        colorbar = {'title' : 'Number of visitor'},
    colorscale='Oranges'
    ,
      ) 

In [83]:
layout = dict(
    title = 'Number of Visitor',
    geo = dict(
        showframe = False,
        projection = {'type':'equirectangular'}
    )
)

In [84]:
choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)

In [85]:
#most language used
df_bylang = df_all.groupby(by='Languages',as_index=False)['IP'].count()
df_bylang.sort_values('IP',ascending=False).head(5)

Unnamed: 0,Languages,IP
25,english,12065
113,russian,1248
13,chinese,564
44,french,431
134,ukrainian,206


In [86]:
#most visitor interest
df_byint = df_all.groupby(by='Interests',as_index=False)['IP'].count()
df_byint.sort_values('IP',ascending=False).head(5)

Unnamed: 0,Interests,IP
57,company,5824
32,blog/personal homepage,1690
281,social network/forum,1413
67,company/other,1072
203,other,940


In [87]:
df_all[df_all['Languages']=='english'].groupby('Country full name').count().sort_values(by='IP',ascending=False).head(5)

Unnamed: 0_level_0,IP,UserAgent,Country,Languages,Interests,Alpha-3 code
Country full name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
United States of America (the),4376,4376,4376,4376,4376,4376
India,1134,1134,1134,1134,1134,1134
United Kingdom of Great Britain and Northern Ireland (the),1023,1023,1023,1023,1023,1023
Canada,512,512,512,512,512,512
Germany,356,356,356,356,356,356


In [88]:
df_all[df_all['Languages']=='russian'].groupby('Country full name').count().sort_values(by='IP',ascending=False).head(5)

Unnamed: 0_level_0,IP,UserAgent,Country,Languages,Interests,Alpha-3 code
Country full name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Ukraine,564,564,564,564,564,564
Russian Federation (the),409,409,409,409,409,409
Germany,35,35,35,35,35,35
Romania,28,28,28,28,28,28
Kazakhstan,23,23,23,23,23,23


In [89]:
df_all[df_all['Languages']=='chinese'].groupby('Country full name').count().sort_values(by='IP',ascending=False).head(5)

Unnamed: 0_level_0,IP,UserAgent,Country,Languages,Interests,Alpha-3 code
Country full name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
United States of America (the),187,187,187,187,187,187
China,94,94,94,94,94,94
Taiwan (Province of China),32,32,32,32,32,32
Hong Kong,27,27,27,27,27,27
Viet Nam,19,19,19,19,19,19


In [90]:
df_all[df_all['Languages']=='french'].groupby('Country full name').count().sort_values(by='IP',ascending=False).head(5)

Unnamed: 0_level_0,IP,UserAgent,Country,Languages,Interests,Alpha-3 code
Country full name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
France,210,210,210,210,210,210
United States of America (the),56,56,56,56,56,56
Morocco,46,46,46,46,46,46
South Africa,21,21,21,21,21,21
Iceland,10,10,10,10,10,10
