In [1]:
import pandas as pd
pd.set_option('display.max_colwidth',1000)

In [None]:
from src.utils import get_city_lat_long
import os

# Load the dataset
data_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), "artifacts", "population.csv")
df = pd.read_csv(data_path)

# Remove all rows that don't have 'city' in the City column
df = df.loc[df['City'].str.contains('city', case=False), :]

# Remove the word 'city' and 'balance' from the City column
df['City'] = df['City'].str.replace(' city', '', case=False)
df['City'] = df['City'].str.replace(' (balance)', '', case=False)

# Add latitude and longitude columns to the dataframe
df['Latitude'] = None
df['Longitude'] = None

# Iterate over the rows to get latitude and longitude
for index, row in df.iterrows():
    city = row['City']
    state = row['State']
    lat, long = get_city_lat_long(city, state)
    df.at[index, 'Latitude'] = lat
    df.at[index, 'Longitude'] = long

df.head()

Unnamed: 0,City,State,Population,Latitude,Longitude
0,New York,New York,8258035,40.712775,-74.005973
1,Los Angeles,California,3820914,34.054908,-118.242643
2,Chicago,Illinois,2664452,41.878114,-87.629798
3,Houston,Texas,2314157,29.760077,-95.370111
4,Phoenix,Arizona,1650070,33.448377,-112.074037


In [None]:
df['volume (ton)'] = (df['Population'] * 1.00) / 1000 # For small IT waste. Assuming ~1 kg per capita per year for small IT waste
df.head()

Unnamed: 0,City,State,Population,Latitude,Longitude,volume (ton)
0,New York,New York,8258035,40.712775,-74.005973,8258.035
1,Los Angeles,California,3820914,34.054908,-118.242643,3820.914
2,Chicago,Illinois,2664452,41.878114,-87.629798,2664.452
3,Houston,Texas,2314157,29.760077,-95.370111,2314.157
4,Phoenix,Arizona,1650070,33.448377,-112.074037,1650.07


In [None]:
df.drop(["Population"], axis = 1, inplace=True)
df.head()

Unnamed: 0,City,State,Latitude,Longitude,volume (ton)
0,New York,New York,40.712775,-74.005973,8258.035
1,Los Angeles,California,34.054908,-118.242643,3820.914
2,Chicago,Illinois,41.878114,-87.629798,2664.452
3,Houston,Texas,29.760077,-95.370111,2314.157
4,Phoenix,Arizona,33.448377,-112.074037,1650.07


In [None]:
import numpy as np
code = ["W" + item for item in np.arange(1, df.shape[0]+1).astype(str)]
df['Code'] = code
df.head()

Unnamed: 0,City,State,Latitude,Longitude,volume (ton),Code
0,New York,New York,40.712775,-74.005973,8258.035,W1
1,Los Angeles,California,34.054908,-118.242643,3820.914,W2
2,Chicago,Illinois,41.878114,-87.629798,2664.452,W3
3,Houston,Texas,29.760077,-95.370111,2314.157,W4
4,Phoenix,Arizona,33.448377,-112.074037,1650.07,W5


In [None]:
from src.utils import add_sheet_to_excelbook

data_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), "artifacts", "input_data.xlsx")
waste_volume = df[['Code', 'volume (ton)']]
add_sheet_to_excelbook(data_path, "Waste Volume", waste_volume)

waste_source = df[['Code', 'City', 'State', 'Latitude', 'Longitude']]
add_sheet_to_excelbook(data_path, "Waste Source", waste_source)

In [3]:
waste_source_data = pd.read_excel(data_path, sheet_name='Waste Source')
shredder_data = pd.read_excel(data_path, sheet_name='E-waste Collector')
oxide_producer_data = pd.read_excel(data_path, sheet_name='Oxide Producer')
fluoride_producer_data = pd.read_excel(data_path, sheet_name='Flouride Producer')
metal_producer_data = pd.read_excel(data_path, sheet_name='Metal Producer')
magnet_producer_data = pd.read_excel(data_path, sheet_name='Magnet Producer')

# Create dictionaries for shredder and oxide producer coordinates
waste_source = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in waste_source_data.iterrows()}
shredders = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in shredder_data.iterrows()}
oxide_producers = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in oxide_producer_data.iterrows()}
fluoride_producers = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in fluoride_producer_data.iterrows()}
metal_producers = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in metal_producer_data.iterrows()}
magnet_producers = {row['Code']: (row['Latitude'], row['Longitude']) for index, row in magnet_producer_data.iterrows()}

In [None]:
from src.utils import calculate_transportation_cost
import time

waste_to_shredder_tc = calculate_transportation_cost(waste_source, shredders)
time.sleep(5.5)
shredder_to_oxide_tc = calculate_transportation_cost(shredders, oxide_producers)
time.sleep(5.5)
oxide_to_fluoride_tc = calculate_transportation_cost(oxide_producers, fluoride_producers)
time.sleep(5.5)
fluoride_to_metal_tc = calculate_transportation_cost(fluoride_producers, metal_producers)
time.sleep(5.5)
metal_to_magnet_tc = calculate_transportation_cost(metal_producers, magnet_producers)
time.sleep(5.5)


In [10]:
from src.utils import add_sheet_to_excelbook
add_sheet_to_excelbook(data_path, "Shredder-Oxide TC", shredder_to_oxide_tc)
add_sheet_to_excelbook(data_path, "Oxide-Fluoride TC", oxide_to_fluoride_tc)
add_sheet_to_excelbook(data_path, "Fluoride-Metal TC", fluoride_to_metal_tc)
add_sheet_to_excelbook(data_path, "Metal-Magnet TC", metal_to_magnet_tc)
add_sheet_to_excelbook(data_path, "Waste-Shredder TC", waste_to_shredder_tc)