In [2]:
import pandas as pd
import os

In [4]:
cpi = pd.read_csv("../data/processed/CPI.csv")

In [6]:
corruption_df = cpi[cpi['Indicator'] == 'Corruption Perceptions Index Score']


In [8]:
europe_corruption_2023 = corruption_df[['Economy Name', 'Economy ISO3', '2023']]


In [10]:
europe_corruption_2023.head()

Unnamed: 0,Economy Name,Economy ISO3,2023
2,Albania,ALB,37.0
6,Austria,AUT,71.0
10,Belgium,BEL,73.0
14,Bulgaria,BGR,45.0
18,Bosnia and Herzegovina,BIH,35.0


In [12]:
import plotly.express as px

# Sort the DataFrame by corruption score
europe_corruption_sorted = europe_corruption_2023.sort_values(by='2023', ascending=False)

# Create a bar chart
fig = px.bar(
    europe_corruption_sorted,
    x='2023',
    y='Economy Name',
    orientation='h',
    title='Corruption Perceptions Index in Europe (2023)',
    labels={'2023': 'CPI Score', 'Economy Name': 'Country'},
    color='2023',
    color_continuous_scale='Viridis'
)

# Update layout for better appearance
fig.update_layout(
    yaxis=dict(autorange="reversed"),  # Highest scores on top
    coloraxis_colorbar=dict(title="CPI Score")
)

fig.show()


In [14]:
import plotly.express as px

# Create a choropleth map with custom figure size
fig = px.choropleth(
    europe_corruption_2023,
    locations='Economy ISO3',
    color='2023',
    hover_name='Economy Name',
    color_continuous_scale='Viridis',
    range_color=(europe_corruption_2023['2023'].min(), europe_corruption_2023['2023'].max()),
    title='Corruption Perceptions in Europe (2023)',
    labels={'2023': 'CPI Score'},
    projection='natural earth',
    width=1000,   # <-- Set the width here
    height=800    # <-- Set the height here
)

# Update layout for better appearance
fig.update_layout(
    geo=dict(
        showcountries=True,
        showcoastlines=True,
        showland=True,
        countrycolor="lightgrey"
    ),
    coloraxis_colorbar=dict(title="CPI Score")
)

fig.show()



In [16]:
# Define thresholds (adjust based on CPI scale)
def categorize_cpi(score):
    if score >= 75:
        return 'Low Corruption'
    elif score >= 50:
        return 'Medium Corruption'
    else:
        return 'High Corruption'

europe_corruption_2023['Corruption Level'] = europe_corruption_2023['2023'].apply(categorize_cpi)

# Update the choropleth to use categories
fig = px.choropleth(
    europe_corruption_2023,
    locations='Economy ISO3',
    color='Corruption Level',
    hover_name='Economy Name',
    color_discrete_map={
        'Low Corruption': 'green',
        'Medium Corruption': 'orange',
        'High Corruption': 'red'
    },
    title='Corruption Levels in Europe (2023)',
    labels={'Corruption Level': 'Corruption Level'},
    projection='natural earth'
)

fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



### Time Frame 

In [17]:
# Melt the DataFrame to have years as a variable
melted_df = corruption_df.melt(
    id_vars=['Economy Name', 'Economy ISO3'],
    value_vars=[str(year) for year in range(2012, 2024)],
    var_name='Year',
    value_name='CPI Score'
)

# Remove missing values
melted_df = melted_df.dropna(subset=['CPI Score'])

# Create an animated choropleth map
fig = px.choropleth(
    melted_df,
    locations='Economy ISO3',
    color='CPI Score',
    hover_name='Economy Name',
    animation_frame='Year',
    color_continuous_scale='Viridis',
    range_color=(melted_df['CPI Score'].min(), melted_df['CPI Score'].max()),
    title='Corruption Perceptions in Europe (2012-2023)',
    labels={'CPI Score': 'CPI Score'},
    projection='natural earth'
)

fig.update_layout(
    geo=dict(
        showcountries=True,
        showcoastlines=True,
        showland=True,
        countrycolor="lightgrey"
    ),
    coloraxis_colorbar=dict(title="CPI Score")
)

fig.show()
