In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from shapely.geometry import Point
from shapely.geometry import mapping

In [None]:
df = pd.read_excel('tweets/tweets_11578_scores.xlsx')
columns = ['Agriculture', 'Water', 'Ecosystem', 'Economy', 'Society', 'General']

In [None]:
# Convert date to week

df['date'] = pd.to_datetime(df['date']).dt.normalize()
df['month'] = df['date'].dt.month
df['week'] = df['date'].dt.isocalendar().week 

df['week_start'] = df['date'] - pd.to_timedelta(df['date'].dt.weekday, unit='D')

df_weekly = df.groupby('week_start')[columns].sum().reset_index()

# Calculate normalized scores

grand_total = df_weekly[columns].sum()
df_weekly_perc = (df_weekly[columns] / grand_total) * 100
df_weekly_perc['week_start'] = df_weekly['week_start']

In [None]:
# Plotting category scores across weeks from May to August

df_weekly_perc = df_weekly_perc[df_weekly_perc['week_start'].dt.month.isin([5, 6, 7, 8])]

category_colors = {
    'Agriculture': '#d95f0e',
    'Water': '#3182bd',
    'Ecosystem': '#31a354',
    'Economy': '#756bb1',
    'Society': '#dd1c77',
    'General': '#636363'
}

plt.figure(figsize=(10, 6))

for category in category_colors.keys():
    plt.plot(df_weekly_perc['week_start'], df_weekly_perc[category], marker='o', color=category_colors[category], label=category)

plt.xlabel('Week')
plt.ylabel('Normalized score')
plt.gca().xaxis.set_major_locator(mdates.WeekdayLocator(interval=1))
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df_22 = pd.read_excel('tweets/tweets_11578_scores.xlsx')
df_20_21 = pd.read_excel('tweets/tweets_nuts_20_21.xlsx')

# Combine three years of tweets into one single df

df_combined = pd.concat([df_22, df_20_21])

In [None]:
# Aggregate the count of location extracted tweets by month

df_combined['date'] = pd.to_datetime(df_combined['date'], errors='coerce')
df_combined['month_year'] = df_combined['date'].dt.to_period('M')
month_counts = df_combined['month_year'].value_counts().sort_index()

In [None]:
spi = pd.read_csv('eobs/spi3.csv', low_memory=False)

In [None]:
# Split the point column containing latitude and longitude as a string in the format (lat, lon)

spi[['lat', 'lon']] = spi['point'].str.strip('()').str.split(', ', expand=True).astype(float)

# Create a geo df of Point geometries using the longitude and latitude columns

geometry = [Point(xy) for xy in zip(spi['lon'], spi['lat'])]
spi_gdf = gpd.GeoDataFrame(spi, geometry=geometry)
spi_gdf = spi_gdf.set_crs(epsg=4326, inplace=True)

In [None]:
gdf = gpd.read_file('tweets/NUTS_RG_60M_2021_4326.shp', encoding='latin1')
gdf_italy = gdf[gdf['CNTR_CODE'] == 'IT']

In [None]:
# Check the shapefile

gdf_italy.plot()

In [None]:
# Perform a spatial join and keep only points within the Italian boundaries

spi_italy = gpd.sjoin(spi_gdf, gdf_italy, how='inner', predicate='within')

In [None]:
# Check the start month

spi_italy.columns[841]

In [None]:
spi_italy = spi_italy.apply(pd.to_numeric, errors='coerce')

# Define a threshold value to identify low SPI and count the gridded cells where SPI values are below the threshold

threshold = -2

selected_columns = spi_italy.columns[841:877]
count_below_threshold = (spi_italy[selected_columns] < threshold).sum()

low_spi = pd.DataFrame(count_below_threshold, columns=['count'])

In [None]:
# Plotting the count of location extracted tweets and gridded cells with low SPI

plt.figure(figsize=(12, 6))

ax1 = month_counts.plot(kind='bar', color='skyblue', edgecolor='black', width=0.8, label='Location Extracted Tweets')
ax1.set_xlabel('Month-Year')
ax1.set_ylabel('Count of Location Extracted Tweets')
ax1.tick_params(axis='x', rotation=45)

ax2 = ax1.twinx()
ax2.plot(low_spi.index, low_spi['count'], color='orange', marker='o', linestyle='-', label='Gridded Cells with Low SPI')
ax2.set_ylabel('Count of Gridded Cells with Low SPI3')

lines1, labels1 = ax1.get_legend_handles_labels() 
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

plt.tight_layout()

plt.show()