# Geography of the Amazon IN 
Creating maps based on the status of delivery (delivered, cancelled and returned to seller)

In [1]:
# Dependencies and Setup
import pandas as pd
import requests
import time
import json
import folium
import io
import matplotlib.pyplot as plt
import branca

# Import API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

# For the overall map
from urllib.request import urlopen 

In [2]:
# Load the CSV file into a Pandas DataFrame
zipcode_df = pd.read_csv("../output/cleaned_Amazon Sale Report.csv")
zipcode_df.head()

Unnamed: 0,Order ID,Date,Status,Status (Successful),Fulfilment,ship-service-level,Style,SKU,Category,Size,ASIN,Qty,Amount,ship-city,ship-state,ship-postal-code,promotion-ids
0,405-8078784-5731545,2022-04-30,Cancelled,False,Merchant,Standard,SET389,SET389-KR-NP-S,Set,S,B09KXVBD7Z,0,647.62,MUMBAI,MAHARASHTRA,400081.0,
1,171-9198151-1101146,2022-04-30,Delivered,True,Merchant,Standard,JNE3781,JNE3781-KR-XXXL,Kurta,3XL,B09K3WFS32,1,406.0,BENGALURU,KARNATAKA,560085.0,Amazon PLCC Free-Financing Universal Merchant ...
2,403-9615377-8133951,2022-04-30,Cancelled,False,Merchant,Standard,J0341,J0341-DR-L,Western Dress,L,B099NRCT7B,0,753.33,PUDUCHERRY,PUDUCHERRY,605008.0,
3,406-7807733-3785945,2022-04-30,Delivered,True,Merchant,Standard,JNE3405,JNE3405-KR-S,Kurta,S,B081WX4G4Q,1,399.0,HYDERABAD,TELANGANA,500032.0,Amazon PLCC Free-Financing Universal Merchant ...
4,407-5443024-5233168,2022-04-30,Cancelled,False,Amazon,Expedited,SET200,SET200-KR-NP-A-XXXL,Set,3XL,B08L91ZZXN,0,,HYDERABAD,TELANGANA,500008.0,IN Core Free Shipping 2015/04/08 23-48-5-108


In [3]:
# Drop the unnecessary columns for this map
zipcode_df.drop(['Order ID', 'Date', 'Fulfilment', 'ship-service-level', 'Style', 'SKU', 'Category', 'Size', 'ASIN', 'promotion-ids'],axis=1, inplace=True)
zipcode_df.head()

Unnamed: 0,Status,Status (Successful),Qty,Amount,ship-city,ship-state,ship-postal-code
0,Cancelled,False,0,647.62,MUMBAI,MAHARASHTRA,400081.0
1,Delivered,True,1,406.0,BENGALURU,KARNATAKA,560085.0
2,Cancelled,False,0,753.33,PUDUCHERRY,PUDUCHERRY,605008.0
3,Delivered,True,1,399.0,HYDERABAD,TELANGANA,500032.0
4,Cancelled,False,0,,HYDERABAD,TELANGANA,500008.0


In [4]:
# Consolidate RTS to Cancelled
zipcode_df.loc[:, 'Status'] = zipcode_df['Status'].replace({
    "RTS (Return to Seller)": "Cancelled"
})

zipcode_df['Status'].value_counts()

Status
Delivered    29472
Cancelled    20339
Name: count, dtype: int64

---

## Cleaning up the State/Territory names in the report

In [5]:
# Check for typos or abbreviations of the State/UT names
zipcode_df['ship-state'].value_counts()

ship-state
MAHARASHTRA               8230
KARNATAKA                 6143
UTTAR PRADESH             4310
TELANGANA                 4263
TAMIL NADU                4225
KERALA                    3007
DELHI                     2614
WEST BENGAL               2251
ANDHRA PRADESH            2217
GUJARAT                   1761
HARYANA                   1613
RAJASTHAN                 1134
MADHYA PRADESH            1061
BIHAR                      892
ODISHA                     855
PUNJAB                     755
ASSAM                      703
UTTARAKHAND                666
JHARKHAND                  615
GOA                        419
CHHATTISGARH               385
HIMACHAL PRADESH           370
JAMMU & KASHMIR            308
CHANDIGARH                 133
MANIPUR                    131
PUDUCHERRY                 126
ANDAMAN & NICOBAR          114
MEGHALAYA                   90
SIKKIM                      82
NAGALAND                    77
ARUNACHAL PRADESH           70
TRIPURA                     

In [6]:
# Rename/unify the State/UT
zipcode_df.loc[:, 'ship-state'] = zipcode_df['ship-state'].replace({
    "RAJSHTHAN": "RAJASTHAN", #Typo
    "PUNJAB/MOHALI/ZIRAKPUR": "PUNJAB", #Commonly consolidated to the first city
    "RJ": "RAJASTHAN", #Abbreviation for Rajasthan
    "ORISSA": "ODISHA", #Odisha was formerly named Orissa
    "LADAKH": "LEH", #On openweather website
    "ANDAMAN & NICOBAR": "ANDAMAN", #On openweather website
    "DADRA AND NAGAR": "DADRA", #On openweather website
    "LAKSHADWEEP": "KAVARATTI", #On openweather website
    "TELANGANA": "HYDERABAD", #On openweather website
})
zipcode_df['ship-state'].value_counts()

ship-state
MAHARASHTRA          8230
KARNATAKA            6143
UTTAR PRADESH        4310
HYDERABAD            4263
TAMIL NADU           4225
KERALA               3007
DELHI                2614
WEST BENGAL          2251
ANDHRA PRADESH       2217
GUJARAT              1761
HARYANA              1613
RAJASTHAN            1136
MADHYA PRADESH       1061
BIHAR                 892
ODISHA                857
PUNJAB                756
ASSAM                 703
UTTARAKHAND           666
JHARKHAND             615
GOA                   419
CHHATTISGARH          385
HIMACHAL PRADESH      370
JAMMU & KASHMIR       308
CHANDIGARH            133
MANIPUR               131
PUDUCHERRY            126
ANDAMAN               114
MEGHALAYA              90
SIKKIM                 82
NAGALAND               77
ARUNACHAL PRADESH      70
TRIPURA                61
MIZORAM                33
DADRA                  29
NEW DELHI              28
LEH                    14
KAVARATTI               3
APO                     1
N

In [7]:
listofstates = zipcode_df['ship-state'].unique()
listofstates

array(['MAHARASHTRA', 'KARNATAKA', 'PUDUCHERRY', 'HYDERABAD',
       'ANDHRA PRADESH', 'HARYANA', 'JHARKHAND', 'CHHATTISGARH', 'ASSAM',
       'ODISHA', 'UTTAR PRADESH', 'GUJARAT', 'TAMIL NADU', 'UTTARAKHAND',
       'WEST BENGAL', 'RAJASTHAN', 'DELHI', 'MADHYA PRADESH', 'KERALA',
       'JAMMU & KASHMIR', 'BIHAR', 'MEGHALAYA', 'PUNJAB', 'GOA', 'DADRA',
       'HIMACHAL PRADESH', 'TRIPURA', 'CHANDIGARH', 'SIKKIM', 'ANDAMAN',
       'MANIPUR', 'MIZORAM', 'NAGALAND', 'NEW DELHI', 'ARUNACHAL PRADESH',
       'LEH', nan, 'KAVARATTI', 'APO'], dtype=object)

### Notice the NaN values, clean up those rows

In [8]:
# Check what the nan values are
nan_rows = zipcode_df[zipcode_df['ship-state'].isna()]
nan_rows

Unnamed: 0,Status,Status (Successful),Qty,Amount,ship-city,ship-state,ship-postal-code
5839,Cancelled,False,0,380.0,,,
7354,Delivered,True,1,376.0,,,
10250,Cancelled,False,1,654.0,,,
11140,Cancelled,False,1,399.0,,,
13198,Delivered,True,1,1299.0,,,
16143,Cancelled,False,0,380.0,,,
16144,Cancelled,False,0,380.0,,,
23790,Delivered,True,1,376.0,,,
30199,Delivered,True,1,699.0,,,
30317,Cancelled,False,0,,,,


In [9]:
# Drop the NaN rows in ship-state since it is not possible to pull a geographical information from this
cleaned_zip = zipcode_df.dropna(subset=['ship-state'])

# Check if the Nan rows are now dropped
nan_rows = cleaned_zip[cleaned_zip['ship-state'].isna()]

cleaned_zip

Unnamed: 0,Status,Status (Successful),Qty,Amount,ship-city,ship-state,ship-postal-code
0,Cancelled,False,0,647.62,MUMBAI,MAHARASHTRA,400081.0
1,Delivered,True,1,406.00,BENGALURU,KARNATAKA,560085.0
2,Cancelled,False,0,753.33,PUDUCHERRY,PUDUCHERRY,605008.0
3,Delivered,True,1,399.00,HYDERABAD,HYDERABAD,500032.0
4,Cancelled,False,0,,HYDERABAD,HYDERABAD,500008.0
...,...,...,...,...,...,...,...
49806,Cancelled,False,1,771.00,JUNAGADH,GUJARAT,362001.0
49807,Cancelled,False,1,665.00,MUMBAI,MAHARASHTRA,400056.0
49808,Cancelled,False,1,574.00,PRAYAGRAJ (ALLAHABAD),UTTAR PRADESH,211007.0
49809,Cancelled,False,0,,KOLKATA,WEST BENGAL,700040.0


In [10]:
ready_for_latlng = cleaned_zip['ship-state'].unique()
ready_for_latlng

array(['MAHARASHTRA', 'KARNATAKA', 'PUDUCHERRY', 'HYDERABAD',
       'ANDHRA PRADESH', 'HARYANA', 'JHARKHAND', 'CHHATTISGARH', 'ASSAM',
       'ODISHA', 'UTTAR PRADESH', 'GUJARAT', 'TAMIL NADU', 'UTTARAKHAND',
       'WEST BENGAL', 'RAJASTHAN', 'DELHI', 'MADHYA PRADESH', 'KERALA',
       'JAMMU & KASHMIR', 'BIHAR', 'MEGHALAYA', 'PUNJAB', 'GOA', 'DADRA',
       'HIMACHAL PRADESH', 'TRIPURA', 'CHANDIGARH', 'SIKKIM', 'ANDAMAN',
       'MANIPUR', 'MIZORAM', 'NAGALAND', 'NEW DELHI', 'ARUNACHAL PRADESH',
       'LEH', 'KAVARATTI', 'APO'], dtype=object)

---

## Pull in the API to get the coordinates of the State/Territories<br>
Pull the coordinate information from openweathermap and make<br>
3 DataFrames based on the status and create different maps

In [11]:

# API base URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")


# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(ready_for_latlng):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = query_url + city

    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()
        # print(city_weather)

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]

        # Append the City information into city_data list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lng": city_lng})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass

    # pause to avoid rate limiting
    time.sleep(1)

# Indicate that Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | MAHARASHTRA
Processing Record 2 of Set 1 | KARNATAKA
Processing Record 3 of Set 1 | PUDUCHERRY
Processing Record 4 of Set 1 | HYDERABAD
Processing Record 5 of Set 1 | ANDHRA PRADESH
Processing Record 6 of Set 1 | HARYANA
Processing Record 7 of Set 1 | JHARKHAND
Processing Record 8 of Set 1 | CHHATTISGARH
Processing Record 9 of Set 1 | ASSAM
Processing Record 10 of Set 1 | ODISHA
Processing Record 11 of Set 1 | UTTAR PRADESH
Processing Record 12 of Set 1 | GUJARAT
Processing Record 13 of Set 1 | TAMIL NADU
Processing Record 14 of Set 1 | UTTARAKHAND
Processing Record 15 of Set 1 | WEST BENGAL
Processing Record 16 of Set 1 | RAJASTHAN
Processing Record 17 of Set 1 | DELHI
Processing Record 18 of Set 1 | MADHYA PRADESH
Processing Record 19 of Set 1 | KERALA
Processing Record 20 of Set 1 | JAMMU & KASHMIR
Processing Record 21 of Set 1 | BIHAR
Processing Record 22 of Set 1 | MEGHALAYA
Processing Recor

In [12]:
city_data_df = pd.DataFrame(city_data)
city_data_df.rename(columns={'City': 'ship-state'}, inplace=True)
city_data_df

Unnamed: 0,ship-state,Lat,Lng
0,MAHARASHTRA,19.5,75.0
1,KARNATAKA,13.5,76.0
2,PUDUCHERRY,11.9333,79.8167
3,HYDERABAD,17.3753,78.4744
4,ANDHRA PRADESH,16.0,79.0
5,HARYANA,29.0,76.0
6,JHARKHAND,23.75,85.5
7,CHHATTISGARH,21.5,82.0
8,ASSAM,26.0,93.0
9,ODISHA,20.5,84.4167


In [13]:
# Export the City_Data into a csv
city_data_df.to_csv("../output/lat_lng.csv", index=False)

---

## Create each status' report including the coordinates

### Cancelled counts

In [14]:
#  Cancelled Status
cxld_sts = cleaned_zip.loc[cleaned_zip['Status'] == 'Cancelled']
cxld_grouped = cxld_sts.groupby('ship-state').size().reset_index(name='count')
cxld_coords = pd.merge(city_data_df, cxld_grouped, on='ship-state', how='left')

# Output to csv
cxld_sts.to_csv("../output/Coords_Cancelled.csv", index=False)

cxld_coords.head()

Unnamed: 0,ship-state,Lat,Lng,count
0,MAHARASHTRA,19.5,75.0,3217
1,KARNATAKA,13.5,76.0,2443
2,PUDUCHERRY,11.9333,79.8167,55
3,HYDERABAD,17.3753,78.4744,1793
4,ANDHRA PRADESH,16.0,79.0,985


### Delivered counts

In [15]:
delivered_sts = cleaned_zip.loc[cleaned_zip['Status'] == 'Delivered']
delivered_grouped = delivered_sts.groupby('ship-state').size().reset_index(name='count')
delivered_coords = pd.merge(city_data_df, delivered_grouped, on='ship-state', how='left')

# Output to csv
delivered_sts.to_csv("../output/Coords_Delivered.csv", index=False)

### Overall counts

In [16]:
# overall_grouped = cleaned_zip.groupby(['Status', 'ship-state']).size().reset_index(name='count')
# overall_coords = pd.merge(city_data_df, overall_grouped, on='ship-state', how='left')
# overall_coords

# # Output to csv
# overall_coords.to_csv("../output/Coords_Overall.csv", index=False)

In [17]:
horizontal2 = pd.merge(cxld_coords, delivered_coords, on='ship-state', how='left')
horizontal3 = horizontal2.rename(columns={
    'Lat_x': 'Lat',
    'Lng_x': 'Lng',
    'count_x': 'Cxld',
    'count_y': 'Delivered'
    })
horizontal4 = horizontal3.drop(columns=['Lat_y', 'Lng_y'])
horizontal5 = horizontal4.dropna(subset=['Delivered'])
horizontal5


Unnamed: 0,ship-state,Lat,Lng,Cxld,Delivered
0,MAHARASHTRA,19.5,75.0,3217,5013.0
1,KARNATAKA,13.5,76.0,2443,3700.0
2,PUDUCHERRY,11.9333,79.8167,55,71.0
3,HYDERABAD,17.3753,78.4744,1793,2470.0
4,ANDHRA PRADESH,16.0,79.0,985,1232.0
5,HARYANA,29.0,76.0,619,994.0
6,JHARKHAND,23.75,85.5,276,339.0
7,CHHATTISGARH,21.5,82.0,142,243.0
8,ASSAM,26.0,93.0,315,388.0
9,ODISHA,20.5,84.4167,400,457.0


In [18]:
horizontal5['Delivered'] = horizontal5['Delivered'].astype(int)
print(horizontal5)

           ship-state      Lat       Lng  Cxld  Delivered
0         MAHARASHTRA  19.5000   75.0000  3217       5013
1           KARNATAKA  13.5000   76.0000  2443       3700
2          PUDUCHERRY  11.9333   79.8167    55         71
3           HYDERABAD  17.3753   78.4744  1793       2470
4      ANDHRA PRADESH  16.0000   79.0000   985       1232
5             HARYANA  29.0000   76.0000   619        994
6           JHARKHAND  23.7500   85.5000   276        339
7        CHHATTISGARH  21.5000   82.0000   142        243
8               ASSAM  26.0000   93.0000   315        388
9              ODISHA  20.5000   84.4167   400        457
10      UTTAR PRADESH  27.2500   80.7500  1837       2473
11            GUJARAT  23.0000   72.0000   636       1125
12         TAMIL NADU  11.0000   78.0000  1744       2481
13        UTTARAKHAND  30.2500   79.2500   272        394
14        WEST BENGAL  24.0000   88.0000  1006       1245
15          RAJASTHAN  26.0000   74.0000   430        706
16            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  horizontal5['Delivered'] = horizontal5['Delivered'].astype(int)


In [19]:
horizontal5['Coordinates'] = horizontal5[['Lat', 'Lng']].apply(tuple, axis=1)
horizontal5.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  horizontal5['Coordinates'] = horizontal5[['Lat', 'Lng']].apply(tuple, axis=1)


Unnamed: 0,ship-state,Lat,Lng,Cxld,Delivered,Coordinates
0,MAHARASHTRA,19.5,75.0,3217,5013,"(19.5, 75.0)"
1,KARNATAKA,13.5,76.0,2443,3700,"(13.5, 76.0)"
2,PUDUCHERRY,11.9333,79.8167,55,71,"(11.9333, 79.8167)"
3,HYDERABAD,17.3753,78.4744,1793,2470,"(17.3753, 78.4744)"
4,ANDHRA PRADESH,16.0,79.0,985,1232,"(16.0, 79.0)"


---
## Generating the Cancelled and Delivered maps
### Cancelled counts

In [20]:
c = folium.Map(location=[21,83], tiles="OpenStreetMap", zoom_start=4.4)
for i in range(0,len(cxld_coords)):
   folium.CircleMarker(
      location=[cxld_coords.iloc[i]['Lat'], cxld_coords.iloc[i]['Lng']],
      tooltip=['State: %s' % cxld_coords.iloc[i]['ship-state'], 'Count: %s' % cxld_coords.iloc[i]['count']],
      radius=float(cxld_coords.iloc[i]['count'])/50,
      color='#ff0371',
      fill=True,
      fill_color='#ff0371'
   ).add_to(c)
c


In [21]:
# Save the map as a HTML file
c.save('../output/bubblemap_cxld.html')

### Delivered counts

In [22]:
d = folium.Map(location=[21,83], tiles="OpenStreetMap", zoom_start=4.4)
for i in range(0,len(delivered_coords)):
   folium.CircleMarker(
      location=[delivered_coords.iloc[i]['Lat'], delivered_coords.iloc[i]['Lng']],
      tooltip=['State: %s' % delivered_coords.iloc[i]['ship-state'], 'Count: %s' % delivered_coords.iloc[i]['count']],
      radius=float(delivered_coords.iloc[i]['count'])/50,
      color='#45c585',
      fill=True,
      fill_color='#45c585'
   ).add_to(d)
d


In [23]:
# Save the map as a HTML file
d.save('../output/bubblemap_delivered.html')

---

## Overall Order Status Map

### Pie Charts

In [24]:
pie_charts_data = zip(horizontal5.Cxld, horizontal5.Delivered)

fig = plt.figure(figsize=(0.5, 0.5))
fig.patch.set_alpha(0)
ax = fig.add_subplot(111)
plots = []
for sizes in pie_charts_data:
    ax.pie(sizes, colors=("#ff0371", "#45c585"))
    buff = io.StringIO()
    plt.savefig(buff, format="SVG")
    buff.seek(0)
    svg = buff.read()
    svg = svg.replace("\n", "")
    plots.append(svg)
    plt.cla()
plt.clf()
plt.close()

### Legend

In [25]:
legend_html = """
{% macro html(this, kwargs) %}
<div style="
    position: fixed;
    bottom: 50px;
    left: 50px;
    width: 250px;
    height: 80px;
    z-index:9999;
    font-size:14px;
    ">
    <p><a style="color:#ff0371;font-size:150%;margin-left:20px;">◼</a>&emsp;Cancelled</p>
    <p><a style="color:#45c585;font-size:150%;margin-left:20px;">◼</a>&emsp;Delivered</p>
</div>
<div style="
    position: fixed;
    bottom: 50px;
    left: 50px;
    width: 150px;
    height: 80px;
    z-index:9998;
    font-size:14px;
    background-color: #ffffff;
    filter: blur(8px);
    -webkit-filter: blur(8px);
    opacity: 0.7;
    ">
</div>
{% endmacro %}
"""

legend = branca.element.MacroElement()
legend._template = branca.element.Template(legend_html)

### Map

In [26]:
m = folium.Map(location=(21, 83), tiles="OpenStreetMap", zoom_start=4.5)

for i, coord in enumerate(horizontal5.Coordinates):
    marker = folium.Marker(coord)
    icon = folium.DivIcon(html=plots[i])
    marker.add_child(icon)
    popup = folium.Popup(
        "Cancelled: {}<br>\nDelivered: {}".format(horizontal5.Cxld[i], horizontal5.Delivered[i])
    )
    marker.add_child(popup)
    m.add_child(marker)
m.get_root().add_child(legend)

m

In [27]:
m.save('../output/bubblemap_overall.html')