In [1]:
import pandas as pd
import plotly.express as px
import folium

In [8]:
df_city_count_data = pd.read_csv('data/df_city_count_data_french.csv')
df_country_count_data = pd.read_csv('data/df_translated_country_count_data_french.csv')
df_german = pd.read_csv('data/df_french_with_mentions.csv')
df_countries_german = pd.read_csv('inputdata/df_countries_french_with_capital_and_coordinates.csv')
df_cities_german = pd.read_csv('inputdata/df_cities_french_with_coordinates.csv')

In [9]:
df_city_count_data

Unnamed: 0,City,Count,Coordinates,Longitude,Latitude
0,geneve,101,"(47.553808, 7.592036)",6.143889,46.201511
1,zurich,49,"(47.373754, 8.537087)",8.537087,47.373754
2,berne,45,"(47.553808, 7.592036)",7.592036,47.553808
3,lausanne,44,"(46.520381, 6.63141)",6.631410,46.520381
4,fribourg,34,"(46.805487, 7.162118)",7.162118,46.805487
...,...,...,...,...,...
75,schlieren,1,"(47.39668, 8.44763)",8.447630,47.396680
76,weinfelden,1,"(47.566894, 9.104073)",9.104073,47.566894
77,lenzbourg,1,"(47.388748, 8.17853)",8.178530,47.388748
78,horgen,1,"(47.261948, 8.596927)",8.596927,47.261948


In [10]:
df_country_count_data

Unnamed: 0,FrenchName,Count,Coordinates,Longitude,Latitude,ISO2,ISO3,EnglishName,Capital
0,suisse,398,"(53.156582, 8.385292)",8.385292,53.156582,CH,CHE,Switzerland,berne (de facto)
1,etats-unis,130,"(38.82652, -77.01712)",-77.017120,38.826520,US,USA,United States of America,washington
2,france,121,"(48.858705, 2.342865)",2.342865,48.858705,FR,FRA,France,paris
3,russie,42,"(55.741469, 37.615561)",37.615561,55.741469,RU,RUS,Russian Federation,moscou
4,royaume-uni,37,"(51.509648, -0.099076)",-0.099076,51.509648,GB,GBR,United Kingdom of Great Britain and Northern I...,londres
...,...,...,...,...,...,...,...,...,...
126,guatemala,1,"(14.64072, -90.51327)",-90.513270,14.640720,GT,GTM,Guatemala,guatemala
127,guinee equatoriale,1,"(3.75, 8.78333)",8.783330,3.750000,GQ,GNQ,Equatorial Guinea,malabo
128,afghanistan,1,"(34.52813, 69.17233)",69.172330,34.528130,AF,AFG,Afghanistan,kaboul
129,montenegro,1,"(None, None)",,,ME,MNE,Montenegro,cetinje (presidentielle)


**Choropleth map for country counts**


In [11]:
fig_country = px.choropleth(df_country_count_data,
                            locations="ISO3",  # Use the ISO column for country codes
                            color="Count",  # Column denoting the counts
                            hover_name="EnglishName",  # The title to appear on hover
                            hover_data=["FrenchName", "Capital", "Count"],
                            color_continuous_scale=px.colors.sequential.Plasma)
fig_country.show()

In [6]:
print(df_country_count_data.dtypes)

FrenchName      object
Count            int64
Coordinates     object
Longitude      float64
Latitude       float64
ISO2            object
ISO3            object
EnglishName     object
Capital         object
dtype: object


**Scatter plot for Swiss city counts**

In [7]:
fig_city = px.scatter_geo(df_city_count_data,
                          lat="Latitude",
                          lon="Longitude",
                          size="Count",  # Size of markers based on count
                          hover_name="City",
                          scope="europe",  # Focus on Europe
                          center={"lat": 46.8182, "lon": 8.2275})  # Center on Switzerland
fig_city.show()

**Create a map centered around Switzerland**


In [22]:
switzerland_map = folium.Map(location=[46.8182, 8.2275], zoom_start=7)

# Add a scatterplot with city coordinates
for _, row in df_city_count_data.iterrows():
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=row['Count'] / 5,  # Adjust the size of markers based on the 'Count' column
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        popup=row['City'] + ': ' + str(row['Count']),
    ).add_to(switzerland_map)

# Save the map as an HTML file or display it
switzerland_map.save('output/switzerland_scatterplot.html')


**Interactive bar chart for top countries**

In [9]:
fig_bar_country = px.bar(df_country_count_data.sort_values('Count', ascending=False),
                         x="EnglishName",
                         y="Count",
                         hover_data=["GermanName", "CapitalNameGerman"])
fig_bar_country.show()

**Bar chart for the top 10 countries**

In [11]:
top_10_countries = df_country_count_data.sort_values('Count', ascending=False).head(10)


fig_bar_country = px.bar(top_10_countries,
                         x="EnglishName",
                         y="Count",
                         hover_data=["GermanName", "CapitalNameGerman"],
                         title="Top 10 Most Mentioned Countries")

# Customize the layout
fig_bar_country.update_layout(
    xaxis_title="Country (English Name)",
    yaxis_title="Count",
    xaxis_tickangle=-45,
    showlegend=False,
)

# Show the interactive Plotly chart
fig_bar_country.show()

In [27]:
fig_bar_city = px.bar(df_city_count_data.sort_values('Count', ascending=False),
                      x="City",
                      y="Count")
fig_bar_city.show()

In [15]:
top_10_cities = df_city_count_data.sort_values('Count', ascending=False).head(10)


fig_bar_city = px.bar(top_10_cities.sort_values('Count', ascending=False),
                      x="City",
                      y="Count")
fig_bar_city.show()