In [1]:
import pandas as pd 
import geopandas as gpd
from shapely.geometry import Polygon
import ast
import altair as alt


df = pd.read_csv("../data/processed/processed_municip_kWh_with_polygons.csv", encoding="utf-8", index_col=0)
df = df.reset_index()		
df.head()

Unnamed: 0,Province,Municipality,Month,South-facing with vertical (90 degrees) tilt,South-facing with latitude tilt,South-facing with tilt=latitude+15 degrees,South-facing with tilt=latitude-15 degrees,2-axis tracking,Horizontal (0 degree),address,latitude,longitude,polygons
0,Alberta,Acadia Valley,Annual,3.85,4.95,4.71,4.94,7.09,3.73,"Acadia Valley, Alberta",51.158676,-110.210332,"POLYGON ((-110.2141432 51.1587175, -110.211333..."
1,Alberta,Acme,Annual,3.72,4.76,4.53,4.76,6.77,3.62,"Acme, Alberta",51.497782,-113.513721,"POLYGON ((-113.5143844 51.4985257, -113.514325..."
2,Alberta,Airdrie,Annual,3.66,4.7,4.47,4.7,6.67,3.61,"Airdrie, Alberta",51.28597,-114.01062,"POLYGON ((-114.0107674 51.2860953, -114.010766..."
3,Alberta,Alberta Beach,Annual,3.52,4.44,4.21,4.46,6.35,3.42,"Alberta Beach, Alberta",53.674589,-114.357227,"POLYGON ((-114.3570561 53.6745442, -114.356754..."
4,Alberta,Alder Flats,Annual,3.47,4.41,4.18,4.42,6.28,3.44,"Alder Flats, Alberta",52.931756,-114.959071,"POLYGON ((-114.9682284 52.9313586, -114.967447..."


In [2]:
import re
quebec = df[['Province', 'Municipality']].query('Province == "Quebec"')['Municipality'].to_list()

non_english_names = []

# Regular expression pattern to match only English letters
pattern = re.compile(r'^[a-zA-Z]+$')

for name in quebec:
    # Remove "'" and "-" characters from the name
    cleaned_name = re.sub(r'[\'\-\ ]', '', name)

    if not pattern.match(cleaned_name):
        non_english_names.append(name)

# Display the list of names containing non-English characters
print(non_english_names)

['Baie-TrinitÃ©', 'BÃ©arn', 'BÃ©cancour', 'BÃ©gin', 'ChÃ¢teauguay', 'ChÃ¢teau-Richer', 'DÃ©gelis', 'Ã\x89vain', 'GaspÃ©', 'Grande-RiviÃ¨re', 'Grande-VallÃ©e', 'HÃ©bertville', 'JonquiÃ¨re', 'La DorÃ©', 'La PocatiÃ¨re', 'La TabatiÃ¨re', 'Lac-CarrÃ©', 'Lac-MÃ©gantic', 'LaterriÃ¨re', 'Lebel-sur-QuÃ©villon', 'Les Ã\x89boulements', 'Les Ã\x89troits', 'Les MÃ©chins', 'LÃ©vis', 'LotbiniÃ¨re', 'MaskinongÃ©', 'MatapÃ©dia', 'MÃ©tabetchouan--Lac-Ã\xa0-la-Croix', 'MÃ©tis-sur-Mer', 'MontrÃ©al', 'NÃ©dÃ©lec', 'NormÃ©tal', 'OujÃ©-Bougoumou', 'PaspÃ©biac', 'PercÃ©', 'PÃ©ribonka', 'Petite-RiviÃ¨re', 'PohÃ©nÃ©gamook', 'Pointe-Ã\xa0-la-FrÃ©gate', 'Pointe-Ã\xa0-la-Garde', 'PrÃ©vost', 'QuÃ©bec', 'RÃ©migny', 'RiviÃ¨re-au-Renard', 'RiviÃ¨re-Bleue', 'RiviÃ¨re-du-Loup', 'RiviÃ¨re-Ouelle', 'RiviÃ¨re-Portneuf', 'SacrÃ©-Coeur-Saguenay', 'Saint-AndrÃ©', 'Saint-AndrÃ©-Avellin', 'Saint-AndrÃ©-Est', 'Saint-BarnabÃ©-Nord', 'Saint-CÃ©lestin', 'Saint-CÃ©saire', 'Saint-CÃ´me', 'Saint-CÃ´me--LiniÃ¨re', 'Sainte-AdÃ¨le', 'Sai

In [3]:
corrected_quebec = ['Baie-Trinité', 'Béarn', 'Bécancour', 'Bégin', 'Châteauguay', 'Château-Richer', 'Dégelis', 'Évain', 'Gaspé', 'Grande-Rivière', 'Grande-Vallée', 'Hébertville', 'Jonquière', 'La Doré', 'La Pocatière', 'La Tabatière', 'Lac-Carré', 'Lac-Mégantic', 'Laterrière', 'Lebel-sur-Quévillon', 'Les Éboulements', 'Les Étroits', 'Les Méchins', 'Lévis', 'Lotbinière', 'Maskinongé', 'Matapédia', 'Métabetchouan–Lac-à-la-Croix', 'Métis-sur-Mer', 'Montréal', 'Nédélec', 'Normétal', 'Oujé-Bougoumou', 'Paspébiac', 'Percé', 'Péribonka', 'Petite-Rivière', 'Pohénégamook', 'Pointe-à-la-Frégate', 'Pointe-à-la-Garde', 'Prévost', 'Québec', 'Rémigny', 'Rivière-au-Renard', 'Rivière-Bleue', 'Rivière-du-Loup', 'Rivière-Ouelle', 'Rivière-Portneuf', 'Sacré-Coeur-Saguenay', 'Saint-André', 'Saint-André-Avellin', 'Saint-André-Est', 'Saint-Barnabé-Nord', 'Saint-Célestin', 'Saint-Césaire', 'Saint-Côme', 'Saint-Côme–Linière', 'Sainte-Adèle', 'Sainte-Agathe-de-Lotbinière', 'Sainte-Angèle-de-Mérici', 'Sainte-Anne-de-Beaupré', 'Sainte-Anne-de-la-Pérade', 'Sainte-Cécile-de-Masham', 'Sainte-Clotilde-de-Châteauguay', 'Saint-Édouard', "Sainte-Émélie-de-l'Énergie", 'Sainte-Félicité', 'Sainte-Hélène-de-Bagot', 'Sainte-Hélène-de-Kamouraska', 'Saint-Élie', 'Saint-Elzéar', "Sainte-Perpétue-de-L'Islet", 'Sainte-Pétronille', 'Saint-Éphrem-de-Beauce', 'Saint-Épiphane', 'Sainte-Thècle', 'Sainte-Véronique', 'Saint-Félicien', 'Saint-Félix-de-Kingsey', 'Saint-Félix-de-Valois', 'Saint-Fidèle', 'Saint-Gédéon', 'Saint-Grégoire', 'Saint-Honoré', 'Saint-Honoré-de-Témiscouata', 'Saint-Hubert-de-Rivière-du-Loup', 'Saint-Jérôme', 'Saint-Just-de-Bretenières', 'Saint-Lambert-de-Lévis', "Saint-Léonard-d'Aston", 'Saint-Marc-des-Carrières', 'Saint-Nicéphore', 'Saint-Noël', 'Saint-Pacôme', 'Saint-Philémon', 'Saint-Philippe-de-Néri', 'Saint-Raphaël', 'Saint-Régis', 'Saint-Rémi', 'Saint-Siméon', 'Saint-Théophile', 'Sept-Îles', 'Témiscaming', 'Tête-à-la-Baleine', 'Trois-Rivières', 'Vallée-Jonction', 'Verchères']
name_correction = {key: value for key, value in zip(non_english_names, corrected_quebec)}

name_correction_NT = {
    "Behchok??": "Behchoko",
    "DÃ©l?ne": "Délįne",
    "GamÃ¨tÃ¬": "Gamètì",
    "?utselk'e": "Ųutselk’e",
    "WekweÃ¨tÃ¬": "Wekweètì",
    "What??": "Whatì"
}

name_correction.update(name_correction_NT)

def correct_names(row):
    if row['Municipality'] in name_correction:
        return name_correction[row['Municipality']]
    return row['Municipality']

df['Municipality'] = df.apply(correct_names, axis=1)

df[['Province', 'Municipality']].query('Province == "Quebec"')

Unnamed: 0,Province,Municipality
2512,Quebec,Abercorn
2513,Quebec,Acton Vale
2514,Quebec,Akulivik
2515,Quebec,Albanel
2516,Quebec,Alma
...,...,...
3058,Quebec,Winneway
3059,Quebec,Woburn
3060,Quebec,Wotton
3061,Quebec,Yamachiche


In [4]:
df[['Province', 'Municipality']].query('Province == "Northwest Territories"')

Unnamed: 0,Province,Municipality
1439,Northwest Territories,Aklavik
1440,Northwest Territories,Behchoko
1441,Northwest Territories,Colville Lake
1442,Northwest Territories,Délįne
1443,Northwest Territories,Detah
1444,Northwest Territories,Enterprise
1445,Northwest Territories,Fort Good Hope
1446,Northwest Territories,Fort Liard
1447,Northwest Territories,Fort McPherson
1448,Northwest Territories,Fort Providence


In [7]:
df.head()

Unnamed: 0,Province,Municipality,Month,South-facing with vertical (90 degrees) tilt,South-facing with latitude tilt,South-facing with tilt=latitude+15 degrees,South-facing with tilt=latitude-15 degrees,2-axis tracking,Horizontal (0 degree),address,latitude,longitude,polygons,geometry
0,Alberta,Acadia Valley,Annual,3.85,4.95,4.71,4.94,7.09,3.73,"Acadia Valley, Alberta",51.158676,-110.210332,"POLYGON ((-110.2141432 51.1587175, -110.211333...","POLYGON ((-110.2141432 51.1587175, -110.211333..."
1,Alberta,Acme,Annual,3.72,4.76,4.53,4.76,6.77,3.62,"Acme, Alberta",51.497782,-113.513721,"POLYGON ((-113.5143844 51.4985257, -113.514325...","POLYGON ((-113.5143844 51.4985257, -113.514325..."
2,Alberta,Airdrie,Annual,3.66,4.7,4.47,4.7,6.67,3.61,"Airdrie, Alberta",51.28597,-114.01062,"POLYGON ((-114.0107674 51.2860953, -114.010766...","POLYGON ((-114.0107674 51.2860953, -114.010766..."
3,Alberta,Alberta Beach,Annual,3.52,4.44,4.21,4.46,6.35,3.42,"Alberta Beach, Alberta",53.674589,-114.357227,"POLYGON ((-114.3570561 53.6745442, -114.356754...","POLYGON ((-114.3570561 53.6745442, -114.356754..."
4,Alberta,Alder Flats,Annual,3.47,4.41,4.18,4.42,6.28,3.44,"Alder Flats, Alberta",52.931756,-114.959071,"POLYGON ((-114.9682284 52.9313586, -114.967447...","POLYGON ((-114.9682284 52.9313586, -114.967447..."


In [9]:
from shapely.geometry import Point
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
gdf.head()

  in_crs_string = _prepare_from_proj_string(in_crs_string)


Unnamed: 0,Province,Municipality,Month,South-facing with vertical (90 degrees) tilt,South-facing with latitude tilt,South-facing with tilt=latitude+15 degrees,South-facing with tilt=latitude-15 degrees,2-axis tracking,Horizontal (0 degree),address,latitude,longitude,polygons,geometry
0,Alberta,Acadia Valley,Annual,3.85,4.95,4.71,4.94,7.09,3.73,"Acadia Valley, Alberta",51.158676,-110.210332,"POLYGON ((-110.2141432 51.1587175, -110.211333...",POINT (-110.21033 51.15868)
1,Alberta,Acme,Annual,3.72,4.76,4.53,4.76,6.77,3.62,"Acme, Alberta",51.497782,-113.513721,"POLYGON ((-113.5143844 51.4985257, -113.514325...",POINT (-113.51372 51.49778)
2,Alberta,Airdrie,Annual,3.66,4.7,4.47,4.7,6.67,3.61,"Airdrie, Alberta",51.28597,-114.01062,"POLYGON ((-114.0107674 51.2860953, -114.010766...",POINT (-114.01062 51.28597)
3,Alberta,Alberta Beach,Annual,3.52,4.44,4.21,4.46,6.35,3.42,"Alberta Beach, Alberta",53.674589,-114.357227,"POLYGON ((-114.3570561 53.6745442, -114.356754...",POINT (-114.35723 53.67459)
4,Alberta,Alder Flats,Annual,3.47,4.41,4.18,4.42,6.28,3.44,"Alder Flats, Alberta",52.931756,-114.959071,"POLYGON ((-114.9682284 52.9313586, -114.967447...",POINT (-114.95907 52.93176)


In [None]:
json_file_path = '../data/processed/kWh_poly.json'  # Update with your correct path
gdf.to_file(json_file_path, driver='GeoJSON')

In [6]:
# import pandas as pd
# import geopandas as gpd
# from shapely import wkt
# from shapely.geometry import Polygon

# # Adjust the convert_polygons function to skip non-string polygon data
# def convert_polygons(polygon_str):
#     if pd.isna(polygon_str) or not isinstance(polygon_str, str):
#         return None  # Skip this row if the value is NaN or not a string
#     try:
#         # Convert the string to a shapely Polygon object using WKT
#         return wkt.loads(polygon_str)
#     except Exception as e:
#         # Print the error and return None for this row
#         print(f"Error converting polygon: {e}")
#         return None

# # Apply the conversion function to the 'polygons' column
# df['geometry'] = df['polygons'].apply(convert_polygons)

# # Drop rows where polygons conversion failed (if any)
# df = df.dropna(subset=['geometry'])

# # Create a GeoDataFrame
# gdf = gpd.GeoDataFrame(df, geometry='geometry')

# # Save to GeoJSON
# json_file_path = '../data/processed/kWh_poly.json'  # Update with your correct path
# gdf.to_file(json_file_path, driver='GeoJSON')