# Data Visualization Lab - Sebastiano Cassol - id: 229318
> 14 - 02 - 2022

## Exercise 1
(...)

## Exercise 2
(...)

## Exercise 3
(...)

## Exercise 4
### Worldwide Crop Consumption

In [None]:
import pandas as pd

crop_consumption = pd.read_csv('../worldwide_crop_consumption.csv')

crop_consumption

For the first infographic, we want to visualize the 'RICE' consumption in a certain country over the years. So let's pick a country and prepare the data.
Let's say *Japan*.

In [None]:
crop_japan = crop_consumption[crop_consumption['LOCATION'] == 'JPN']

rice_japan = crop_japan[crop_japan['SUBJECT'] == 'RICE']

rice_japan.reset_index(inplace=True) # create index

rice_japan = rice_japan.iloc[:, 1:] # remove old indexes

rice_japan

Now it's time to prepare the data for the bar plot.

In [None]:
rice_japan_tonneha = rice_japan[rice_japan['MEASURE'] == 'TONNE_HA']

rice_japan_tonneha['TIME'] = pd.to_numeric(rice_japan_tonneha['TIME'])

rice_japan_tonneha['Value'] = pd.to_numeric(rice_japan_tonneha['Value'])

# drop years: 2024, 2025, 2026
rice_japan_tonneha = rice_japan_tonneha[rice_japan['TIME'] != 2024]
rice_japan_tonneha = rice_japan_tonneha[rice_japan['TIME'] != 2025]
rice_japan_tonneha = rice_japan_tonneha[rice_japan['TIME'] != 2026]

years = rice_japan_tonneha['TIME'].tolist()

rice_japan_tonneha

Now let's prepare the data to visualize the entire world consumption of `RICE` in a certain year e.g., 2022, on a map.

In [None]:
world_consumption_2022 = crop_consumption[crop_consumption['TIME'] == 2022]

world_consumption_2022 = world_consumption_2022[world_consumption_2022['SUBJECT'] == 'RICE']
world_consumption_2022 = world_consumption_2022[world_consumption_2022['MEASURE'] == 'TONNE_HA']

world_consumption_2022.reset_index(inplace=True)

world_consumption_2022 = world_consumption_2022.iloc[:, 1:]

min_value = min(world_consumption_2022['Value'])
max_value = max(world_consumption_2022['Value'])
min_year = min(world_consumption_2022['TIME'])
max_year = max(world_consumption_2022['TIME'])

world_consumption_2022

Now we have to deal with geodata.

In [None]:
import json
with open('countries.geo.json') as f:
    worldmap = json.load(f)
    
worldmap

In [None]:
import numpy as np
import seaborn as sns
import mapclassify as mc
import matplotlib.pyplot as plt
import plotly.express as px

# statistical figure

fig, axes = plt.subplots(2, 1, figsize=(20, 15)) # define two subplots

plt.suptitle('How much rice is consumed in the world (and in Japan!)')

sns.barplot(ax = axes[0], data=rice_japan_tonneha, x='TIME', y="Value")
axes[0].set_title("Rice consumption (in tonne ha) in Japan")
axes[0].set_xlabel("Year", fontsize = 15)
axes[0].set_xticks(np.arange(0, len(years), 1))
axes[0].set_xticklabels(np.arange(min_year, max_year, 1), rotation=70, fontsize=15)
axes[0].set_ylabel("Rice", fontsize=15)

axes[1].plot(rice_japan_tonneha['TIME'], rice_japan_tonneha['Value'], color='tab:red')
axes[1].set_xlabel("Years", fontsize=15)
axes[1].tick_params(axis="x", rotation=30, labelsize=10)
axes[1].set_ylabel("Rice consumption", color="tab:red", fontsize=15)
axes[1].tick_params(axis="y", rotation=90, labelsize=10)
axes[1].grid(alpha=.4)

fig.tight_layout()

fig.show()

In [None]:
# choropleth map
fig2 = px.choropleth_mapbox(world_consumption_2022,
                            geojson=worldmap,
                            locations='LOCATION',
                            featureidkey='properties.sov_a3',
                            color='Value',
                            color_continuous_scale="Viridis",
                            range_color=(min_value, max_value),
                            labels={'Country':'LOCATION', 'Consumption':'Value'},
                            title="World rice consumption in 2022",
                            hover_data=['LOCATION', 'Value'],
                            center={'lon': 14, 'lat': 40},
                            mapbox_style='open-street-map',
                            zoom=2)

fig2.update_geos(showcountries=False,
                 showcoastlines=False,
                 showland=False,
                 fitbounds='locations')
fig2.update_layout(margin={"r":0, "t":40, "l":0, "b":0})
fig2.show()

## Exercise 5
Dimensionality reduction

In [None]:
import pandas as pd

setlur = pd.read_csv('../setlur.csv', sep='	')

transposed_setlur = setlur.T

transposed_setlur

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# apply pca to setlur with 3 components
pca = PCA(n_components=3)

transposed_setlur = transposed_setlur.iloc[1:, :]

pca_result = pca.fit_transform(transposed_setlur)

transposed_setlur['pca-one'] = pca_result[:, 0]
transposed_setlur['pca-two'] = pca_result[:, 1]
transposed_setlur['pca-three'] = pca_result[:, 2]

In [None]:
plt.figure(figsize=(16, 10))

sns.scatterplot(
    x="pca-one", y="pca-two",
    hue="y",
    palette=sns.color_palette("hls", 10),
    data=setlur,
    legend="full",
    alpha=0.3
)

plt.show()

## Exercise 6
(...)