###  Imports:

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import contextily
import geopandas as gpd
from shapely.geometry import Point, Polygon
from geopy.geocoders import Nominatim
from tqdm import tqdm 
from decimal import *
import seaborn as sns


### Get existing schema from EC header

In [2]:
# Use only 1st row, header = None, so that we can use the header
#ec = pd.read_csv('Earth Challenge.csv',nrows=1,header = None)
#ecAr = pd.array(ec[1])
#print((ecAr[0]))

ec = pd.read_csv('Earth Challenge.csv',low_memory=False)
header = pd.array(ec.columns)
print(header)

<PandasArray>
[                              'X',                               'Y',
                        'OBJECTID',                'RecordSequenceID',
                        'UniqueID',                        'SourceID',
                  'LocationFreqID',                        'Location',
                         'Dataset',                    'Organization',
                           'Other',          'CountryName_FromSource',
        'SubCountry_L1_FromSource',        'SubCountry_L2_FromSource',
                      'Longitude1',                       'Latitude1',
                      'Longitude2',                       'Latitude2',
                    'TotalWidth_m',                   'TotalLength_m',
                  'TotalArea_Sq_m',                   'ShorelineName',
                  'WaterfrontName',              'BeachAreaLandcover',
                       'BeachType',                       'EventType',
                 'TotalVolunteers',                    'DateOri

### Open Literatti database

In [3]:
litter = pd.read_csv('USA OpenData Report Litterati.csv')
print(litter[['Location']])

                              Location
0        (-122.134176521,37.401654278)
1        (-122.134317937,37.401561979)
2        (-122.134848444,37.401673876)
3        (-122.134343237,37.399733858)
4        (-122.134468295,37.399795591)
...                                ...
1877600  (-122.268333435,37.867202758)
1877601  (-118.467994689,33.995864868)
1877602              (-97.4275,32.694)
1877603           (-97.42834,32.69316)
1877604      (-110.9632514,32.2797253)

[1877605 rows x 1 columns]


### ^^ Above we see that Latitude and Longitude are grouped together

## Functions getX(str) and getY(str)
### The functions take a string of the sort str = ('42.1889634352895/-71.8448098585561')
### getX gets rid of the parenthesis '()' and takes numbers until it reaches a comma ','
### returns: a string reprisenting Latitude 


In [4]:
def getX(str):
    
    x = ''
    for c in str:
        if (c == '('):
            continue
        elif (c == ','):
            break
        else:
            x += c
    return (x)

s = "(-97.4279,32.696)"
x = getX(s)
print(x)
print(type(x))


-97.4279
<class 'str'>


### getY gets rid of the parenthesis '()' and takes the numbers after it reaches a comma ','
### returns: a string reprisenting Longitude 

In [5]:
def getY(str):
    
    y = ''
    start = False
    for c in str:
        if(c == ','):
            start = True
            continue
        elif(start == True):
            if ((c != '(') & (c != ')')):
                y += c
                
            
    return (y)
st = "(-122.134176521,37.401654278)"
y = getY(st)
print(y)
print(type(y))


37.401654278
<class 'str'>


In [271]:
def getStr(str):
    x = ''
    for c in str:
        if ((c == '(') | (c == ')')):
            continue
        else:
            if(c == ','):
                x += c
                x += ' '
                continue
            x += c
    return x

s = "(-93.9228440915178,29.9566605873685)"
x = getStr(s)
print(x)
print(type(x))


-93.9228440915178, 29.9566605873685
<class 'str'>


In [6]:
# Make an array containing the location column
loc2020 = pd.array(litter['Location'])
tags = pd.array(litter['Tags'])
date = pd.array(litter['Date'])

latitude = []
longitude = []
counter = 0

for i in loc2020:
    
    latitude.append(getX(i))
    longitude.append(getY(i))

    counter += 1
    
    # Example Dataframe with coordinates, dates, tags
    # header[0] = 'X', header[1] = 'Y' 
    
dict = {"Date": date,"Tags": tags ,"X" : latitude, "Y" : longitude}
df = pd.DataFrame(dict)
df


Unnamed: 0,Date,Tags,X,Y
0,2561-01-30 00:39:52+05,"{bottlecap,plastic}",-122.134176521,37.401654278
1,2561-01-30 00:39:30+05,"{fork,plastic}",-122.134317937,37.401561979
2,2561-01-30 00:38:06+05,"{plastic,wrapper}",-122.134848444,37.401673876
3,2561-01-24 23:47:00+05,{plastic},-122.134343237,37.399733858
4,2561-01-24 23:46:17+05,{tinfoil},-122.134468295,37.399795591
...,...,...,...,...
1877600,2012-07-02 04:15:40+05,{NULL},-122.268333435,37.867202758
1877601,2012-06-17 21:05:13+05,"{california,instagram,instapic,litter,macdonal...",-118.467994689,33.995864868
1877602,2012-02-08 20:21:35+05,{NULL},-97.4275,32.694
1877603,2012-01-20 07:31:49+05,{NULL},-97.42834,32.69316


### It looks like a lot of the coordinates are pretty close together.
### Since in the EC dataset, each tuple is a cleanup EVENT, we can 
### group up litter by location ? How? ...I'm still figuring that out.


In [7]:
strLoc = []

for i in range(len(loc2020)):
    
    #strLoc.append(getStr(loc2020[i]))
    #print(getStr(loc2020[i]))
    strng = ("%s, %s" %(getY(loc2020[i]), getX(loc2020[i])))
    strLoc.append(strng)
    

df["Loc_str"] = strLoc
df

Unnamed: 0,Date,Tags,X,Y,Loc_str
0,2561-01-30 00:39:52+05,"{bottlecap,plastic}",-122.134176521,37.401654278,"37.401654278, -122.134176521"
1,2561-01-30 00:39:30+05,"{fork,plastic}",-122.134317937,37.401561979,"37.401561979, -122.134317937"
2,2561-01-30 00:38:06+05,"{plastic,wrapper}",-122.134848444,37.401673876,"37.401673876, -122.134848444"
3,2561-01-24 23:47:00+05,{plastic},-122.134343237,37.399733858,"37.399733858, -122.134343237"
4,2561-01-24 23:46:17+05,{tinfoil},-122.134468295,37.399795591,"37.399795591, -122.134468295"
...,...,...,...,...,...
1877600,2012-07-02 04:15:40+05,{NULL},-122.268333435,37.867202758,"37.867202758, -122.268333435"
1877601,2012-06-17 21:05:13+05,"{california,instagram,instapic,litter,macdonal...",-118.467994689,33.995864868,"33.995864868, -118.467994689"
1877602,2012-02-08 20:21:35+05,{NULL},-97.4275,32.694,"32.694, -97.4275"
1877603,2012-01-20 07:31:49+05,{NULL},-97.42834,32.69316,"32.69316, -97.42834"


In [75]:
class Mod3ctr:
    
    def __init__(self, clk):
        self.clk = clk
    def tick(self):
    
        if(self.clk == 10):
            self.clk = 0
        else:
            self.clk += 1


In [77]:
c = Mod3ctr(1)
print(c.clk)

c.tick()
print(c.clk)
c.tick()
print(c.clk)

c.tick()
print(c.clk)
c.tick()
print(c.clk)
c.tick()
print(c.clk)

c.tick()
print(c.clk)

c.tick()
print(c.clk)
c.tick()
print(c.clk)

c.tick()
print(c.clk)
c.tick()
print(c.clk)
c.tick()
print(c.clk)

c.tick()
print(c.clk)

1
2
3
4
5
6
7
8
9
10
0
1
2


In [74]:
geolocator = Nominatim(user_agent = "GMU")
addressLine = []

n = 0
pct = 0
for i in range(60200,60300):
    
    l = geolocator.reverse(strLoc[i])
    addressLine.append(l)

    #l.raw("location")
    print(str(i)+ ": " + str(l))
    

60200: 7, Imperial Drive, Botany Woods, Willington Green, Wade Hampton, Greenville County, South Carolina, 29615, United States of America
60201: 3600, Horizon Boulevard, Trevose, Bensalem Township, Bucks County, Pennsylvania, 19053, United States of America
60202: Walmart Supercenter, 250, Tallmadge Road, Brimfield Station, Brimfield, Brimfield Township, Portage County, Ohio, 44240, United States of America
60203: Southeast 2nd Street, Evansville, Vanderburgh County, Indiana, 47713, United States of America
60204: Dunkin' Donuts, James Street, Florham Park, Morris County, New Jersey, 07932, United States of America
60205: 1201, Pine Street, Mooresville, Iredell County, North Carolina, 28115, United States of America
60206: Pine Street, Mooresville, Iredell County, North Carolina, 28115, United States of America
60207: Southeast 2nd Street, Evansville, Vanderburgh County, Indiana, 47713, United States of America
60208: 13, Imperial Drive, Botany Woods, Willington Green, Wade Hampton, G

### From google earth 0.001 is approximately 90 meters on the ground
### Figure out how to "package" tuples of litter according to how far appart they are


### If the entry1 is within ~ 100km range of the previous entry
### count it towards the same event
### Note that there are multiple events zig-zagging 

In [86]:


indexes1 = []
indexes2 = []
indexes3 = []
indexes4 = []
indexes5 = []
indexes6 = []
indexes7 = []
indexes8 = []
indexes9 = []
indexes10 = []
indexes11 = []

prevx1 = "-82.862800"
prevy1 = "135.000000"

prevx2 = "-84.862800"
prevy2 = "135.000000"

prevx3 = "-88.862800"
prevy3 = "131.000000"

prevx4 = "-88.862800"
prevy4 = "127.000000"

prevx5 = "-88.862800"
prevy5 = "123.000000"

prevx6 = "-88.862800"
prevy6 = "119.000000"

prevx7 = "-88.862800"
prevy7 = "115.000000"

prevx8 = "-88.862800"
prevy8 = "111.000000"

prevx9 = "-88.862800"
prevy9 = "107.000000"

prevx10 = "-88.862800"
prevy10 = "103.000000"

prevx11 = "-88.862800"
prevy11 = "99.000000"

c = Mod3ctr(1)
for index, row in litter.iterrows():

    
    currx = getX(row['Location'])
    curry = getY(row['Location'])

    # if the current coordinates are close, keep going
    if( ( abs( float(prevx1) - float(currx) ) <= 1.0 ) and ( abs( float(prevy1) - float(curry) ) <= 1.0 ) ):
        
        indexes1.append(index)
        prevx1 = currx
        prevy1 = curry
        
    #if 2nd set of coordinates is found, initiate second pair, to take care of zig-zagging values    
    elif(  (abs( float(prevx2)-float(currx) ) <= 1.0) and ( abs( float(prevy2)-float(curry) )  <= 1.0)  ):
        
        indexes2.append(index)
        prevx2 = currx
        prevy2 = curry
        
    #if a 3rd set of coordinates is introduced, initiate 3rd pair  
    elif(  (abs( float(prevx3)-float(currx) ) <= 1.0) and ( abs( float(prevy3)-float(curry) )  <= 1.0)  ):
        
        indexes3.append(index)
        prevx3 = currx
        prevy3 = curry
        
    #if 4th set of coordinates is found, initiate 4th pair, to take care of zig-zagging values    
    elif(  (abs( float(prevx4)-float(currx) ) <= 1.0) and ( abs( float(prevy4)-float(curry) )  <= 1.0)  ):
        
        indexes4.append(index)
        prevx4 = currx
        prevy4 = curry
        
    #if a 5th set of coordinates is introduced, initiate 5th pair  
    elif(  (abs( float(prevx5)-float(currx) ) <= 1.0) and ( abs( float(prevy5)-float(curry) )  <= 1.0)  ):
        
        indexes5.append(index)
        prevx5 = currx
        prevy5 = curry
        
    # if the 6th set coordinates are introduced, initiate 6th pair
    elif( ( abs( float(prevx6) - float(currx) ) <= 1.0 ) and ( abs( float(prevy6) - float(curry) ) <= 1.0 ) ):
        
        indexes6.append(index)
        prevx6 = currx
        prevy6 = curry
        
    #if 7th set of coordinates is found, initiate 7th pair, to take care of zig-zagging values    
    elif(  (abs( float(prevx7)-float(currx) ) <= 1.0) and ( abs( float(prevy7)-float(curry) )  <= 1.0)  ):
        
        indexes7.append(index)
        prevx7 = currx
        prevy7 = curry
        
    #if a 8th set of coordinates is introduced, initiate 8th pair  
    elif(  (abs( float(prevx8)-float(currx) ) <= 1.0) and ( abs( float(prevy8)-float(curry) )  <= 1.0)  ):
        
        indexes8.append(index)
        prevx8 = currx
        prevy8 = curry
        
    #if 9th set of coordinates is found, initiate 9th pair, to take care of zig-zagging values    
    elif(  (abs( float(prevx9)-float(currx) ) <= 1.0) and ( abs( float(prevy9)-float(curry) )  <= 1.0)  ):
        
        indexes9.append(index)
        prevx9 = currx
        prevy9 = curry
        
    #if a 10th set of coordinates is introduced, initiate 10th pair  
    elif(  (abs( float(prevx10)-float(currx) ) <= 1.0) and ( abs( float(prevy10)-float(curry) )  <= 1.0)  ):
        
        indexes10.append(index)
        prevx10 = currx
        prevy10 = curry
        
    #if a 11th set of coordinates is introduced, initiate 11th pair  
    elif(  (abs( float(prevx11)-float(currx) ) <= 1.0) and ( abs( float(prevy11)-float(curry) )  <= 1.0)  ):
        
        indexes11.append(index)
        prevx11 = currx
        prevy11 = curry
        
    #otherwise print start and end locations and form event from indexes
    # If bool is True it makes an event from the prev1 pair coordinates, then makes prev1 pair take new, unencountered pair
    else:
        if(c.clk == 0):
            #change p1

            #print("Start index = %d, location = %s" %(startIdx1, litter.loc[startIdx1,'Location']))
            #print("End index = %d, location = %s" %(endIdx1-1, litter.loc[endIdx1-1,'Location']))
            
            formEvent(indexes1)
            print("p1: " + str(indexes1))
            indexes1 = []
            indexes1.append(index)
            prevx1 = currx
            prevy1 = curry
            c.tick()
            
    #       
        elif(c.clk == 1):
            
            formEvent(indexes2)
            print("p2: " + str(indexes2))
            indexes2 = []
            indexes2.append(index)
            prevx2 = currx
            prevy2 = curry
            c.tick()
            
        elif(c.clk == 2):
            
            formEvent(indexes3)
            print("p3: " + str(indexes3))
            indexes3 = []
            indexes3.append(index)
            prevx3 = currx
            prevy3 = curry
            c.tick()
            
        elif(c.clk == 3):
            
            formEvent(indexes4)
            print("p4: " + str(indexes4))
            indexes4 = []
            indexes4.append(index)
            prevx4 = currx
            prevy4 = curry
            c.tick()
            
        elif(c.clk == 4):
            
            formEvent(indexes5)
            print("p5: " + str(indexes5))
            indexes5 = []
            indexes5.append(index)
            prevx5 = currx
            prevy5 = curry
            c.tick()
            
        if(c.clk == 5):
            
            formEvent(indexes6)
            print("p6: " + str(indexes6))
            indexes6 = []
            indexes6.append(index)
            prevx6 = currx
            prevy6 = curry
            c.tick()
            
    #       
        elif(c.clk == 6):
            
            formEvent(indexes7)
            print("p7: " + str(indexes7))
            indexes7 = []
            indexes7.append(index)
            prevx7 = currx
            prevy7 = curry
            c.tick()
            
        elif(c.clk == 7):
            
            formEvent(indexes8)
            print("p8: " + str(indexes8))
            indexes8 = []
            indexes8.append(index)
            prevx8 = currx
            prevy8 = curry
            c.tick()
            
        elif(c.clk == 8):
            
            formEvent(indexes9)
            print("p9: " + str(indexes9))
            indexes9 = []
            indexes9.append(index)
            prevx9 = currx
            prevy9 = curry
            c.tick()
            
        elif(c.clk == 9):
            
            formEvent(indexes10)
            print("p10: " + str(indexes10))
            indexes10 = []
            indexes10.append(index)
            prevx10 = currx
            prevy10 = curry
            c.tick()
            
        elif(c.clk == 10):
            
            formEvent(indexes11)
            print("p11: " + str(indexes11))
            indexes11 = []
            indexes11.append(index)
            prevx11 = currx
            prevy11 = curry
            c.tick()
            

p2: []
p3: []
p4: []
p5: []
p6: []
p7: []
p8: []
p9: []
p10: []
p11: []
p1: []
Forming Event 0 - 232
p2: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 56, 57, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232]
Forming Event 24 - 24
p3: [24]
Forming Event 25 - 55
p4: [25, 26, 27, 28, 29, 30, 3

KeyboardInterrupt: 

In [85]:
print(litter.loc[25,'Date'])
print(litter.loc[26,'Date'])



2560-03-26 03:01:50+05
2560-03-26 02:48:56+05


In [None]:
#Takes 2 indexes from litterati, then prints their locations, along with the difference in Xs and Ys
# Using reverse geocoding only to test  
# 0 - 23 should be the same
# 24  < Works up to here.
# 25-55

def checkLocations(index1,index2):

    print(getX(litter.loc[index2,'Location']))
    #print(geolocator.reverse(("%s, %s" %(getY(litter.loc[index1,'Location']), getX(litter.loc[index1,'Location'])))))
    #print(geolocator.reverse(("%s, %s" %(getY(litter.loc[index2,'Location']), getX(litter.loc[index2,'Location'])))))
    print("The difference in X: %f, Index: %d" %(abs(float(getX(litter.loc[index1,'Location'])) - float(getX(litter.loc[index2,'Location']))),index2))
    print("The difference in Y: %f" %(abs(float(getY(litter.loc[index1,'Location'])) - float(getY(litter.loc[index2,'Location'])))))
for i in range(23,100,1):
    
    checkLocations(23,i) 
    
    

In [None]:
first = 25
second = 26
checkLocations(first,second)
print("The difference: %f - %f = %f" %(abs(float(getX(litter.loc[first,'Location']))), abs(float(getX(litter.loc[second,'Location']))),abs(abs(float(getX(litter.loc[first,'Location']))) - abs(float(getX(litter.loc[second,'Location']))))))

In [80]:
def formEvent(indexes):
    
    if(indexes == []):
        pass
    else:
        
        print("Forming Event %d - %d" %(indexes[0],indexes[-1]))
        #for i in indexes:
            #Append each index from indexes to an event, count plastic/litter
            #print(i)


In [81]:
formEvent([760,761,762,763,764,765])

Forming Event 760 - 765


In [None]:
unique_labels = litter[["Tags"]].drop_duplicates(subset = ["Tags"])
unique_labels

            

In [None]:
unique_labels_df = litter[["Tags"]].drop_duplicates(subset = ["Tags"])
#unique_labels_df
unique_labels = pd.array(unique_labels_df['Tags'])
#print(type(unique_labels))
unique_labels_df

In [None]:
str = "Bottleasctgt"
if ("bottle" or "can") in str.lower():
    print("hyea")

In [None]:
for index, row in unique_labels.iterrows():
    
    if "plastic" in row["Tags"]: 
        
        if ("bottle" or "can") in row["Tags"].lower():
            #Do what?

In [None]:
value_counts = pd.DataFrame(litter[["Tags"]].value_counts())



In [None]:
tags = []
counts = []
idx = 0
for index, row in value_counts.iterrows():
    
    tags.append(index[0])
    counts.append(row[0])
    
       



In [None]:
percentage_plastic = []
plastic_indexes = []
percentage = 0
for i in range(len(tags)):
   
    if("plastic" in tags[i].lower()):
        
        plastic_indexes.append(i)
        
    elif("foam" in tags[i].lower()):
        
        plastic_indexes.append(i)
        
    elif(("bucket" in tags[i].lower()) or 
         ("crate" in tags[i].lower()) or
         ("lighter" in tags[i].lower()) or
         ("cigarette" in tags[i].lower()) or
         ("straw" in tags[i].lower()) or
         ("crate" in tags[i].lower()))
    
for i in plastic_indexes:
    percentage += counts[i]/1877605 


In [None]:
len(plastic_indexes)

In [None]:
from datetime import datetime
import os

import pandas as pd


raw_data = pd.read_csv('USA OpenData Report Litterati.csv')

LIT_PLASTIC_MAPPINGS = {
    ("CLOTH", "Buoys and floats") : "SUM_Other_BuoysAndFloats",
    ("CLOTH", "Fishing Gear") : "SUM_Other_FishingLineLureRope",
    ("CLOTH", "Fishing Net") : "SUM_Other_Net",
    ("CLOTH", "Fishing Traps") : "SUM_OtherFishing",
    ("CLOTH", "Fishing lures and lines") : "SUM_Other_FishingLineLureRope",
    ("CLOTH", "Lobster Claw Bands") : "SUM_OtherFishing",
    ("CLOTH", "Other Fishing Gear") : "SUM_OtherFishing",
    ("CLOTH", "Plastic Rope or Net") : "SUM_Other_Net",
    ("OTHER ITEMS", "Bait Containers") : "SUM_OtherHardPlastic",
    ("OTHER ITEMS", "Bulk Bags") : "SUM_Soft_Bag",
    ("OTHER ITEMS", "Condoms") : "SUM_HardSoft_PersonalCareProduc",
    ("OTHER ITEMS", "Feminine Hygeine Products") : "SUM_HardSoft_PersonalCareProduc",
    ("OTHER ITEMS", "Foam or Plastic Take Out Containers") : "SUM_Other_PlasticOrFoamFoodContainer",
    ("OTHER ITEMS", "Industrial or Chemical Plastic Packaging") : "SUM_Other_OtherPlasticDebris",
    ("OTHER ITEMS", "Other Plastic") : "SUM_Other_OtherPlasticDebris",
    ("OTHER ITEMS", "Other Plastic Packaging") : "SUM_Other_OtherPlasticDebris",
    ("OTHER ITEMS", "Plastic Film") : "SUM_Soft_WrapperOrLabel",
    ("OTHER ITEMS", "Plastic Food Containers") : "SUM_Other_PlasticOrFoamFoodContainer",
    ("OTHER ITEMS", "Plastic Piping") : "SUM_Other_OtherPlasticDebris",
    ("OTHER ITEMS", "Plastic Sheeting or Tarps") : "SUM_Soft_sheets",
    ("OTHER ITEMS", "Plastic Shipping Waste") : "SUM_Other_OtherPlasticDebris",
    ("OTHER ITEMS", "Plastic String") : "SUM_Other_StringRingRibbon",
    ("OTHER ITEMS", "Styrofoam Packaging") : "SUM_Foam_OtherPlasticDebris",
    ("OTHER ITEMS", "Syringes") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Aquaculture Gear") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Balloon and/or String") : "SUM_Other_StringRingRibbon",
    ("PLASTIC", "Cigarettes/Cigars") : "SUM_Soft_CigaretteButts",
    ("PLASTIC", "Fishing Line") : "SUM_Other_FishingLineLureRope",
    ("PLASTIC", "Foam Fragment") : "SUM_Foam_OtherPlasticDebris",
    ("PLASTIC", "Foam or Plastic Cups or Plates") : "SUM_PlasticOrFoamPlatesBowlsCup",
    ("PLASTIC", "Other Plastic Jugs") : "SUM_Hard_OtherPlasticBottle",
    ("PLASTIC", "Other Rubber Items") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Personal Care Products") : "SUM_HardSoft_PersonalCareProduc",
    ("PLASTIC", "Plastic Bags") : "SUM_Soft_Bag",
    ("PLASTIC", "Plastic Bottle") : "SUM_Hard_PlasticBeverageBottle",
    ("PLASTIC", "Plastic Caps or Lids") : "SUM_HardOrSoft_PlasticBottleCap",
    ("PLASTIC", "Plastic Fiber") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Plastic Food Wrappers") : "SUM_Soft_WrapperOrLabel",
    ("PLASTIC", "Plastic Pellet") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Plastic Utensils") : "SUM_PlasticOrFoamPlatesBowlsCup",
    ("PLASTIC", "Plastic or Foam Fragments") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Rubber Bands") : "SUM_Other_StrapsTiesBands",
    ("PLASTIC", "Rubber Fragments") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Rubber Gloves") : "SUM_Other_OtherPlasticDebris",
    ("PLASTIC", "Six-pack rings") : "SUM_Soft_WrapperOrLabel",
    ("PLASTIC", "Strapping Bands") : "SUM_Other_StrapsTiesBands",
    ("PLASTIC", "Straws") : "SUM_Soft_Straw",
    ("PLASTIC", "Tobacco Packaging or Lighters") : "SUM_Hard_Lighter",
    ("PLASTIC", "Toys") : "SUM_Other_OtherPlasticDebris",
    ("RUBBER", "Flip-flops") : "SUM_Other_OtherPlasticDebris",
    ("RUBBER", "Tires") : "SUM_Other_OtherPlasticDebris"
}



In [None]:
transformed_data = []

for item in raw_data.itertuples():
   
    print(item)
    #LIT_PLASTIC_MAPPINGS[(item.tags, item.itemname)]
    

In [None]:
LIT_PLASTIC_MAPPINGS[("CLOTH", "Buoys and floats")]

In [None]:


final_data = pd.DataFrame.from_records(transformed_data)
final_data.to_csv(
    os.path.join(PROJECT_DIR, "data", "processed" , "marine-debris-tracker__ec2020-format.csv"), 
    index=False
    )