# Import Statements

In [None]:
import os
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd

from shapely.ops import cascaded_union

import matplotlib.pyplot as plt
import geoplot
import seaborn as sns
import plotly.graph_objects as go

# Load Data

In [None]:
district_emp = pd.read_csv('district_emp_data_full.csv')
district_emp = district_emp.replace(
    {'Jharkhand ': 'Jharkhand',
     'Maharashtra ': 'Maharashtra',
     'Orissa': 'Odisha',
     'Telangana ': 'Telangana'
    }
)
district_emp.head()

In [None]:
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/TDEK8O&version=1.0
pai_coal_mines = pd.read_csv('pai_coal_mines.csv')
pai_coal_mines = pai_coal_mines.replace(
    {'Jharkhand ': 'Jharkhand',
     'Maharashtra ': 'Maharashtra',
     'Orissa': 'Odisha',
     'Telangana ': 'Telangana'
    }
)
pai_coal_mines.head()

In [None]:
plants = pd.read_csv("Global-Coal-Plant-Tracker-Jan-2022.csv", encoding="ISO-8859-1")
plants = plants[plants['Country'] == 'India']
plants = plants[plants['Latitude'].isna() == False]
plants = plants[plants['Longitude'].isna() == False]
plants = plants[plants['Status'].isin(['operating'])] 
plants = plants.drop_duplicates('ParentID')
plants = plants[['Plant', 'Subnational unit (province, state)', 'Status', 'Latitude', 'Longitude', 
                 'Capacity (MW)', 'Annual CO2 (million tonnes / annum)', 'Year']]
plants = plants.rename(columns={'Subnational unit (province, state)': 'State', 'Year': 'Plant Age'})
plants['Capacity (MW)'] = plants['Capacity (MW)'].astype(float)
plants['Plant Age'] = plants['Plant Age'].apply(lambda x: 2021 - float(x))
plants = plants.replace(
    {'Madhya Pradesh / Uttar Pradesh': 'Madhya Pradesh',
     'Punjab state': 'Punjab',
     'Uttar Pradesh / Madhya Pradesh': 'Uttar Pradesh'
    }
)
plants.head()

In [None]:
mines = pd.read_csv("June 2021 Global Coal Mine Tracker.csv", encoding="ISO-8859-1")
mines = mines[mines['Country'] == 'India']
mines = mines[mines['Status'] == 'Operating'] # 'Proposed'
mines = mines[mines['Latitude'].isna() == False]
mines = mines[mines['Longitude'].isna() == False]
mines = mines.drop_duplicates('Mine ID') 
mines = mines[['Mine Name', 'Status', 'Status Detail', 'State, Province', 'Coal Output (Annual, Mt)', 'Mine Type', 
               'Latitude', 'Longitude', 'Opening Year']]
mines = mines[mines['Opening Year'] != 'TBD']
mines['Mine Age'] = mines['Opening Year'].apply(lambda x: 2021 - float(x))
mines = mines.drop(columns=['Opening Year'])
mines = mines.replace(
    {'Madhya Pradesh / Uttar Pradesh': 'Madhya Pradesh',
     'Punjab state': 'Punjab',
     'Uttar Pradesh / Madhya Pradesh': 'Uttar Pradesh'
    }
)
mines.head()

# Missingness

States missing from Pai Data

In [None]:
set(plants['State']).union(set(mines['State, Province'])) - set(district_emp['State/UT Name'])

In [None]:
set(mines['State, Province']) - set(pai_coal_mines['State/UT Name'])

States missing from GEM data

In [None]:
set(district_emp['State/UT Name']) - set(plants['State']).union(set(mines['State, Province']))

In [None]:
set(pai_coal_mines['State/UT Name']) - set(mines['State, Province'])

# Visualize Inputs

In [None]:
os.getcwd()

In [None]:
os.chdir('../../data/shapes')
transmission = gpd.read_file('transmission.shp')

In [None]:
os.chdir('../pickle')
file = open("polygons.pickle", "rb")
polygons = pickle.load(file)
file.close()
file = open("states.pickle", "rb")
states = pickle.load(file)
file.close()

In [None]:
os.chdir('../../outputs/plots')
fig = geoplot.polyplot(
    gpd.GeoSeries(polygons),
    edgecolor='red',
    facecolor='red',
    figsize=(12, 12), 
)

geoplot.polyplot(
    gpd.GeoSeries(states),
    edgecolor='gray',
    ax=fig,
    linewidth=0.75
)

geoplot.sankey(
    gpd.GeoSeries(transmission['geometry']),
    ax=fig
)


fig = fig.get_figure()
fig.savefig('alternative_coal_incumbency_transmission')