In [11]:
# import geojson
import os
import json
import shutil
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon, MultiPolygon

In [2]:
# import csv in pandas
df = pd.read_csv('ita_general_2020.csv')

In [3]:
df = df.drop(columns=['ita_general_2020'])
print(df.head())

   longitude   latitude
0   6.215694  48.000139
1   6.216250  48.000139
2   6.216528  48.000139
3   6.243750  48.000139
4   6.244028  48.000139


In [4]:
min_lon = df['longitude'].min()
min_lat = df['latitude'].min()

df['x'] = df.apply(lambda row: row['longitude'] - min_lon, axis=1)
df['y'] = df.apply(lambda row: row['latitude'] - min_lat, axis=1)

print(df.head())

   longitude   latitude         x          y
0   6.215694  48.000139  0.215833  12.999722
1   6.216250  48.000139  0.216389  12.999722
2   6.216528  48.000139  0.216667  12.999722
3   6.243750  48.000139  0.243889  12.999722
4   6.244028  48.000139  0.244167  12.999722


In [None]:
# write in csv df
df.to_csv('italy_xy.csv', index=False)

In [5]:
with open('region_shape.geojson') as f:
    data = json.load(f)
    
polygons = gpd.GeoDataFrame.from_features(data['features'])

In [54]:
def get_city_dataset(city_name, region = False, region_name = None):
    
    city = polygons[polygons['prov_name'] == city_name]

    city_polygon = city['geometry'].iloc[0]
    
    if isinstance(city_polygon, Polygon):
        # Wrap the single polygon in a list to make it iterable
        city_polygon = [city_polygon]

    city_polygon = MultiPolygon(city_polygon)

    df['in_city'] = df.apply(lambda row: city_polygon.contains(Point(row['longitude'], row['latitude'])), axis=1)

    city_df = df[df['in_city'] == True]

    city_df = city_df.drop(['in_city'], axis=1)
    if 'ita_general_2020' in city_df.columns:
        city_df = city_df.drop(['ita_general_2020'],axis=1)

    city_df['x'] = city_df.apply(lambda row: row['longitude'] - city_df['longitude'].min(), axis=1)
    city_df['y'] = city_df.apply(lambda row: row['latitude'] - city_df['latitude'].min(), axis=1)

    if 'Valle d\'Aosta/Vallée d\'Aoste' in region_name:
        city_df.to_csv('aosta_xy.csv', index=False)
    elif(region and region_name != 'Valle d\'Aosta/Vallée d\'Aoste'):
        city_df.to_csv(region_name+"_"+city_name+'_xy.csv', index=False)
    else:
        city_df.to_csv(city_name+'_xy.csv', index=False)
    

def get_region_dataset(region_name):
    cities = polygons[polygons['reg_name'] == region_name]['prov_name'].unique()
    print(cities)
    for city in cities:
        get_city_dataset(city, True, region_name)
    # path = os.getcwd()
    # files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    # files = [f for f in files if f.startswith(region_name)]
    # print(files)
    # df = pd.DataFrame()
    # for f in files:
    #     df = pd.read_csv(f)
    #     if len(files) > 1:
    #         df = pd.concat([df, pd.read_csv(f)])
    # if 'Valle d\'Aosta/Vallée d\'Aoste' in region_name:
    #     df.to_csv('aosta_xy.csv', index=False)
    # else:
    #     df.to_csv(region_name+'_xy.csv', index=False)
    # for f in files:
    #     os.remove(f)
    # path = os.getcwd()
    # files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    # files = [f for f in files if  f.startswith(region_name) and f.endswith('_xy.csv')]
    # print(files)
    # for f in files:
    #     shutil.move(f, './scp_project/src/resources/')

In [None]:
get_city_dataset('Roma')

In [55]:
get_region_dataset('Valle d\'Aosta/Vallée d\'Aoste')

["Valle d'Aosta/Vallée d'Aoste"]
