Last modified: November 25, 2024

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/iris-lew/projects/blob/main/project_005_IdentifyingBadData/identifying_bad_datasets_supplementary.ipynb)

This is a supplementary notebook. It is only supposed to generate the "urban" column found in the main notebook. I decided to separate this part because I am unable to run all the points on Google Colab without timing out or failing to execute, hence I split it up into chunks. As those chunks were taking a lot of space in the main notebook, I decided to place them here. The three primary outputs of this notebook are:


*   df_train_deduped_urban.csv
*   df_validation_deduped_urban.csv
*   df_test_deduped_urban.csv

The main notebook will just be reading the outputs of this notebook in.

# Importing the packages, functions, and data (with data cleaning)

In [1]:
### only used for Google Colab
import os
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
%cd /content/gdrive/MyDrive/

Mounted at /content/gdrive
/content/gdrive/MyDrive


In [2]:
### Downloaded and extracted Kaggle images into a directory. For this demo, I am using data
%cd data

/content/gdrive/MyDrive/data


In [None]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from matplotlib import image as mpimg
from PIL import Image, ImageFile
# from PIL import Image, ImageEnhance

import folium
from IPython.display import HTML
from folium.plugins import HeatMap

In [None]:
# ### It'll take forever to run this code due to the number of records.
# ### I ran the code once, and then saved the outputs in a CSV file.
# ### Uncomment to run the code.
# ### needs dbf for records and shp just for polygon. only need polygon

# https://www.oecd.org/cfe/regionaldevelopment/functionalurbanareasbycountry.htm ### canada

# # https://stackoverflow.com/questions/48897635/given-a-geographical-coordinate-in-u-s-how-to-find-out-if-it-is-in-urban-or-ru

import shapefile
from shapely.geometry import Point # Point class
from shapely.geometry import shape # shape() is a function to convert geo objects through the interface

def is_urban(pt, allshapes):
    result = False
    for i in range(len(allshapes)):
        boundary = allshapes[i] # get a boundary polygon
        #name = all_records[i][3] + ', ' + all_records[i][4] # get the second field of the corresponding record
        if Point(pt).within(shape(boundary)): # make a point and see if it's in the polygon
            result = True
    return result

# pt = (-97.759615,30.258773) # an x,y tuple latitude, longitude
shp = shapefile.Reader('./Canada_core.shp') #open the shapefile
all_shapes_1 = shp.shapes() # get all the polygons
# all_records = shp.records()

shp2 = shapefile.Reader('./Canada.shp') #open the shapefile
all_shapes_2 = shp2.shapes() # get all the polygons

# df_validation

In [None]:
# validation
lists = []

status = ["nowildfire","wildfire"]
for state in status:
  for filename in os.listdir("./valid/"+state):
    temp = []
    temp.append('./valid/'+state+'/'+filename)
    temp.append(filename[:filename.index(',')])
    temp.append(filename[filename.index(',')+1:filename.index('.jpg')])
    temp.append(state)
    lists.append(temp)

df_validation = pd.DataFrame(lists, columns=["file_name","latitude","longitude","wildfire_status"])
df_validation

Unnamed: 0,file_name,latitude,longitude,wildfire_status
0,"./valid/nowildfire/-73.910371,45.429903.jpg",-73.910371,45.429903,nowildfire
1,"./valid/nowildfire/-73.906629,45.631967.jpg",-73.906629,45.631967,nowildfire
2,"./valid/nowildfire/-75.566836,45.460096.jpg",-75.566836,45.460096,nowildfire
3,"./valid/nowildfire/-75.568926,45.437194.jpg",-75.568926,45.437194,nowildfire
4,"./valid/nowildfire/-73.918042,45.568354.jpg",-73.918042,45.568354,nowildfire
...,...,...,...,...
6295,"./valid/wildfire/-70.05913,48.03148.jpg",-70.05913,48.03148,wildfire
6296,"./valid/wildfire/-70.12939,49.35492.jpg",-70.12939,49.35492,wildfire
6297,"./valid/wildfire/-70.0567,52.6694.jpg",-70.0567,52.6694,wildfire
6298,"./valid/wildfire/-70.03096,46.61391.jpg",-70.03096,46.61391,wildfire


In [None]:
# do the same things to the df_validation dataframe and the df_test dataframe.
df_validation["latitude_numeric"]=pd.to_numeric(df_validation["latitude"])
df_validation["longitude_numeric"] = np.where(df_validation["longitude"].str.endswith("(1)"),
                                              df_validation["longitude"].str.replace(" (1)",""),
                                              df_validation["longitude"])
df_validation["longitude_numeric"] = pd.to_numeric(df_validation["longitude_numeric"])

df_validation_deduped = df_validation.drop_duplicates(subset=["wildfire_status","latitude_numeric","longitude_numeric"], keep="last")

In [None]:
# remember, latitude and longitude are reversed in dataset.
df_validation_deduped['urban_core'] = df_validation_deduped.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_validation_deduped

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core
0,"./valid/nowildfire/-73.910371,45.429903.jpg",-73.910371,45.429903,nowildfire,-73.910371,45.429903,False
1,"./valid/nowildfire/-73.906629,45.631967.jpg",-73.906629,45.631967,nowildfire,-73.906629,45.631967,True
2,"./valid/nowildfire/-75.566836,45.460096.jpg",-75.566836,45.460096,nowildfire,-75.566836,45.460096,True
3,"./valid/nowildfire/-75.568926,45.437194.jpg",-75.568926,45.437194,nowildfire,-75.568926,45.437194,True
4,"./valid/nowildfire/-73.918042,45.568354.jpg",-73.918042,45.568354,nowildfire,-73.918042,45.568354,True
...,...,...,...,...,...,...,...
6295,"./valid/wildfire/-70.05913,48.03148.jpg",-70.05913,48.03148,wildfire,-70.059130,48.031480,False
6296,"./valid/wildfire/-70.12939,49.35492.jpg",-70.12939,49.35492,wildfire,-70.129390,49.354920,False
6297,"./valid/wildfire/-70.0567,52.6694.jpg",-70.0567,52.6694,wildfire,-70.056700,52.669400,False
6298,"./valid/wildfire/-70.03096,46.61391.jpg",-70.03096,46.61391,wildfire,-70.030960,46.613910,False


In [None]:
df_validation_deduped[["wildfire_status","urban_core"]].value_counts().sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,311
nowildfire,True,2509
wildfire,False,3440
wildfire,True,40


This takes a lot more time, so I'm going to do this by ~1000 and then stacking them on top of each other.

In [None]:
df_validation_1000 = df_validation_deduped.iloc[:1000,]
df_validation_2000 = df_validation_deduped.iloc[1000:2000,]
df_validation_3000 = df_validation_deduped.iloc[2000:3000,]
df_validation_4000 = df_validation_deduped.iloc[3000:4000,]
df_validation_5000 = df_validation_deduped.iloc[4000:5000,]
df_validation_6000 = df_validation_deduped.iloc[5000:,]
df_validation_6000

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core
5000,"./valid/wildfire/-71.00893,46.44994.jpg",-71.00893,46.44994,wildfire,-71.00893,46.44994,False
5001,"./valid/wildfire/-70.98919,48.28597.jpg",-70.98919,48.28597,wildfire,-70.98919,48.28597,False
5002,"./valid/wildfire/-70.98057,49.60761.jpg",-70.98057,49.60761,wildfire,-70.98057,49.60761,False
5003,"./valid/wildfire/-71.01678,48.53507.jpg",-71.01678,48.53507,wildfire,-71.01678,48.53507,False
5004,"./valid/wildfire/-71.10295,48.44455.jpg",-71.10295,48.44455,wildfire,-71.10295,48.44455,False
...,...,...,...,...,...,...,...
6295,"./valid/wildfire/-70.05913,48.03148.jpg",-70.05913,48.03148,wildfire,-70.05913,48.03148,False
6296,"./valid/wildfire/-70.12939,49.35492.jpg",-70.12939,49.35492,wildfire,-70.12939,49.35492,False
6297,"./valid/wildfire/-70.0567,52.6694.jpg",-70.0567,52.6694,wildfire,-70.05670,52.66940,False
6298,"./valid/wildfire/-70.03096,46.61391.jpg",-70.03096,46.61391,wildfire,-70.03096,46.61391,False


In [None]:
df_validation_1000['urban_commute'] = df_validation_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_1000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_1000['urban_commute'] = df_validation_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,969
nowildfire,False,31


In [None]:
df_validation_2000['urban_commute'] = df_validation_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_2000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_2000['urban_commute'] = df_validation_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,111
nowildfire,True,889


In [None]:
df_validation_3000['urban_commute'] = df_validation_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_3000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_3000['urban_commute'] = df_validation_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,820
wildfire,False,157
wildfire,True,23


In [None]:
df_validation_4000['urban_commute'] = df_validation_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_4000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_4000['urban_commute'] = df_validation_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,953
wildfire,True,47


In [None]:
df_validation_5000['urban_commute'] = df_validation_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_5000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_5000['urban_commute'] = df_validation_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,776
wildfire,True,224


In [None]:
df_validation_6000['urban_commute'] = df_validation_6000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_validation_6000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_validation_6000['urban_commute'] = df_validation_6000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,1080
wildfire,True,220


In [None]:
df_validation_deduped = pd.concat([df_validation_1000,df_validation_2000,df_validation_3000,
                                df_validation_4000, df_validation_5000,df_validation_6000])

In [None]:
df_validation_deduped["urban"] = df_validation_deduped.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)
df_validation_deduped[["wildfire_status","urban_core","urban_commute","urban"]].value_counts().sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
wildfire_status,urban_core,urban_commute,urban,Unnamed: 4_level_1
nowildfire,False,False,False,142
nowildfire,False,True,True,169
nowildfire,True,True,True,2509
wildfire,False,False,False,2966
wildfire,False,True,True,474
wildfire,True,True,True,40


In [None]:
df_validation_deduped_urban = df_validation_deduped
df_validation_deduped_urban
df_validation_deduped_urban.to_csv("df_validation_deduped_urban.csv", index=False)

In [None]:
# Switched the latitude and longitude.
# validating that this is correctly capturing the urban locations on the map
df_validation_deduped_urban_only = df_validation_deduped_urban[df_validation_deduped_urban["urban"]==True]

lat = df_validation_deduped_urban_only["longitude_numeric"]
lon = df_validation_deduped_urban_only["latitude_numeric"]

map = folium.Map(location=[lat.mean(),lon.mean()],
                 zoom_start=5,
                 control_scale=True)
HeatMap(list(zip(lat, lon))).add_to(map)

map

In [None]:
# Switched the latitude and longitude.
# Validating that this is only showing the non-urban locations on the map
df_validation_deduped_nourban_only = df_validation_deduped_urban[df_validation_deduped_urban["urban"]==False]

lat = df_validation_deduped_nourban_only["longitude_numeric"]
lon = df_validation_deduped_nourban_only["latitude_numeric"]

map = folium.Map(location=[lat.mean(),lon.mean()],
                 zoom_start=5,
                 control_scale=True)
HeatMap(list(zip(lat, lon))).add_to(map)

map

# df_train

In [None]:
# train
lists = []

status = ["nowildfire","wildfire"]

for state in status:
  for filename in os.listdir("./train/"+state):
    temp = []
    temp.append('./train/'+state+'/'+filename)
    temp.append(filename[:filename.index(',')])
    temp.append(filename[filename.index(',')+1:filename.index('.jpg')])
    temp.append(state)
    lists.append(temp)

df_train = pd.DataFrame(lists, columns=["file_name","latitude","longitude","wildfire_status"])
df_train

Unnamed: 0,file_name,latitude,longitude,wildfire_status
0,"./train/nowildfire/-79.535187,43.882733.jpg",-79.535187,43.882733,nowildfire
1,"./train/nowildfire/-79.533182,43.784243.jpg",-79.533182,43.784243,nowildfire
2,"./train/nowildfire/-79.534956,43.647635.jpg",-79.534956,43.647635,nowildfire
3,"./train/nowildfire/-79.536092,43.633224.jpg",-79.536092,43.633224,nowildfire
4,"./train/nowildfire/-79.531656,43.704536.jpg",-79.531656,43.704536,nowildfire
...,...,...,...,...
30265,"./train/wildfire/-66.73731,50.35672.jpg",-66.73731,50.35672,wildfire
30266,"./train/wildfire/-66.75804,48.01979.jpg",-66.75804,48.01979,wildfire
30267,"./train/wildfire/-66.7444,50.05576.jpg",-66.7444,50.05576,wildfire
30268,"./train/wildfire/-66.72849,48.01978.jpg",-66.72849,48.01978,wildfire


In [None]:
# data cleaning
df_train["latitude_numeric"]=pd.to_numeric(df_train["latitude"])
df_train["longitude_numeric"] = np.where(df_train["longitude"].str.endswith("(1)"),
                                         df_train["longitude"].str.replace(" (1)",""),
                                         df_train["longitude"])
df_train["longitude_numeric"] = pd.to_numeric(df_train["longitude_numeric"])

df_train_deduped = df_train.drop_duplicates(subset=["wildfire_status","latitude_numeric","longitude_numeric"], keep="last")

In [None]:
df_train_deduped_1000 = df_train_deduped.iloc[:1000,]
df_train_deduped_2000 = df_train_deduped.iloc[1000:2000,]
df_train_deduped_3000 = df_train_deduped.iloc[2000:3000,]
df_train_deduped_4000 = df_train_deduped.iloc[3000:4000,]
df_train_deduped_5000 = df_train_deduped.iloc[4000:5000,]
df_train_deduped_6000 = df_train_deduped.iloc[5000:6000,]
df_train_deduped_7000 = df_train_deduped.iloc[6000:7000,]
df_train_deduped_8000 = df_train_deduped.iloc[7000:8000,]
df_train_deduped_9000 = df_train_deduped.iloc[8000:9000,]
df_train_deduped_10000 = df_train_deduped.iloc[9000:10000,]
df_train_deduped_11000 = df_train_deduped.iloc[10000:11000,]
df_train_deduped_12000 = df_train_deduped.iloc[11000:12000,]
df_train_deduped_13000 = df_train_deduped.iloc[12000:13000,]
df_train_deduped_14000 = df_train_deduped.iloc[13000:14000,]
df_train_deduped_15000 = df_train_deduped.iloc[14000:15000,]
df_train_deduped_16000 = df_train_deduped.iloc[15000:16000,]
df_train_deduped_17000 = df_train_deduped.iloc[16000:17000,]
df_train_deduped_18000 = df_train_deduped.iloc[17000:18000,]
df_train_deduped_19000 = df_train_deduped.iloc[18000:19000,]
df_train_deduped_20000 = df_train_deduped.iloc[19000:20000,]
df_train_deduped_21000 = df_train_deduped.iloc[20000:21000,]
df_train_deduped_22000 = df_train_deduped.iloc[21000:22000,]
df_train_deduped_23000 = df_train_deduped.iloc[22000:23000,]
df_train_deduped_24000 = df_train_deduped.iloc[23000:24000,]
df_train_deduped_25000 = df_train_deduped.iloc[24000:25000,]
df_train_deduped_26000 = df_train_deduped.iloc[25000:26000,]
df_train_deduped_27000 = df_train_deduped.iloc[26000:27000,]
df_train_deduped_28000 = df_train_deduped.iloc[27000:28000,]
df_train_deduped_29000 = df_train_deduped.iloc[28000:29000,]
df_train_deduped_30000 = df_train_deduped.iloc[29000:,]
df_train_deduped_30000

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric
29000,"./train/wildfire/-68.5573,49.0969.jpg",-68.5573,49.0969,wildfire,-68.55730,49.09690
29001,"./train/wildfire/-68.55238,48.07464.jpg",-68.55238,48.07464,wildfire,-68.55238,48.07464
29002,"./train/wildfire/-68.58333,47.99041.jpg",-68.58333,47.99041,wildfire,-68.58333,47.99041
29003,"./train/wildfire/-68.53291,56.43375.jpg",-68.53291,56.43375,wildfire,-68.53291,56.43375
29004,"./train/wildfire/-68.57638,49.73591.jpg",-68.57638,49.73591,wildfire,-68.57638,49.73591
...,...,...,...,...,...,...
30265,"./train/wildfire/-66.73731,50.35672.jpg",-66.73731,50.35672,wildfire,-66.73731,50.35672
30266,"./train/wildfire/-66.75804,48.01979.jpg",-66.75804,48.01979,wildfire,-66.75804,48.01979
30267,"./train/wildfire/-66.7444,50.05576.jpg",-66.7444,50.05576,wildfire,-66.74440,50.05576
30268,"./train/wildfire/-66.72849,48.01978.jpg",-66.72849,48.01978,wildfire,-66.72849,48.01978


In [None]:
df_train_deduped_1000['urban_core'] = df_train_deduped_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_1000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_1000['urban_core'] = df_train_deduped_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,92
nowildfire,True,908


In [None]:
df_train_deduped_1000['urban_commute'] = df_train_deduped_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_1000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_1000['urban_commute'] = df_train_deduped_1000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_1000["urban"] = df_train_deduped_1000.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)


In [None]:
df_train_deduped_1000[["wildfire_status",'urban_commute',"urban_core"]].value_counts().sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
wildfire_status,urban_commute,urban_core,Unnamed: 3_level_1
nowildfire,True,False,92
nowildfire,True,True,908


In [None]:
df_train_deduped_2000['urban_core'] = df_train_deduped_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_2000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_3000['urban_core'] = df_train_deduped_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_3000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_2000['urban_core'] = df_train_deduped_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_3000['urban_core'] = df_train_deduped_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,96
nowildfire,True,904


In [None]:
df_train_deduped_2000['urban_commute'] = df_train_deduped_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_2000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_2000['urban_commute'] = df_train_deduped_2000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_2000["urban"] = df_train_deduped_2000.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,13
nowildfire,True,987


In [None]:
df_train_deduped_3000['urban_commute'] = df_train_deduped_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_3000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_3000['urban_commute'] = df_train_deduped_3000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_3000["urban"] = df_train_deduped_3000.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,96
nowildfire,True,904


In [None]:
df_train_deduped_urban1 = pd.concat([df_train_deduped_1000,df_train_deduped_2000,df_train_deduped_2000])
df_train_deduped_urban1

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core,urban_commute,urban
0,"./train/nowildfire/-79.535187,43.882733.jpg",-79.535187,43.882733,nowildfire,-79.535187,43.882733,True,True,True
1,"./train/nowildfire/-79.533182,43.784243.jpg",-79.533182,43.784243,nowildfire,-79.533182,43.784243,True,True,True
2,"./train/nowildfire/-79.534956,43.647635.jpg",-79.534956,43.647635,nowildfire,-79.534956,43.647635,True,True,True
3,"./train/nowildfire/-79.536092,43.633224.jpg",-79.536092,43.633224,nowildfire,-79.536092,43.633224,True,True,True
4,"./train/nowildfire/-79.531656,43.704536.jpg",-79.531656,43.704536,nowildfire,-79.531656,43.704536,True,True,True
...,...,...,...,...,...,...,...,...,...
1995,"./train/nowildfire/-79.534024,43.735478.jpg",-79.534024,43.735478,nowildfire,-79.534024,43.735478,True,True,True
1996,"./train/nowildfire/-79.534617,43.807638.jpg",-79.534617,43.807638,nowildfire,-79.534617,43.807638,True,True,True
1997,"./train/nowildfire/-79.533826,43.745524.jpg",-79.533826,43.745524,nowildfire,-79.533826,43.745524,True,True,True
1998,"./train/nowildfire/-79.532817,43.829241.jpg",-79.532817,43.829241,nowildfire,-79.532817,43.829241,True,True,True


In [None]:
df_train_deduped_urban1[["urban","urban_commute", "urban_core"]].value_counts().sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
urban,urban_commute,urban_core,Unnamed: 3_level_1
False,False,False,26
True,True,False,94
True,True,True,2880


In [None]:
df_train_deduped_urban1.to_csv("df_train_deduped_urban1.csv", index=False)

In [None]:
df_train_deduped_4000['urban_core'] = df_train_deduped_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_4000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_5000['urban_core'] = df_train_deduped_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_5000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_6000['urban_core'] = df_train_deduped_6000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_6000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_4000['urban_core'] = df_train_deduped_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_5000['urban_core'] = df_train_deduped_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pand

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,200
nowildfire,True,800


In [None]:
df_train_deduped_4000['urban_commute'] = df_train_deduped_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_4000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_4000['urban_commute'] = df_train_deduped_4000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_5000['urban_commute'] = df_train_deduped_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_5000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_5000['urban_commute'] = df_train_deduped_5000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_6000['urban_commute'] = df_train_deduped_6000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_6000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_6000['urban_commute'] = df_train_deduped_6000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,171
nowildfire,True,829


In [None]:
df_train_deduped_urban2 = pd.concat([df_train_deduped_4000,df_train_deduped_5000,df_train_deduped_6000])
df_train_deduped_urban2.to_csv("df_train_deduped_urban2.csv", index=False)

In [None]:
df_train_deduped_7000['urban_core'] = df_train_deduped_7000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_7000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_8000['urban_core'] = df_train_deduped_8000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_8000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_9000['urban_core'] = df_train_deduped_9000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_9000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_7000['urban_core'] = df_train_deduped_7000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_8000['urban_core'] = df_train_deduped_8000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pand

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,117
nowildfire,True,883


In [None]:
df_train_deduped_7000['urban_commute'] = df_train_deduped_7000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_7000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_7000['urban_commute'] = df_train_deduped_7000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,141
nowildfire,True,859


In [None]:
df_train_deduped_8000['urban_commute'] = df_train_deduped_8000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_8000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_8000['urban_commute'] = df_train_deduped_8000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,230
nowildfire,True,770


In [None]:
df_train_deduped_9000['urban_commute'] = df_train_deduped_9000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_9000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_9000['urban_commute'] = df_train_deduped_9000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,34
nowildfire,True,966


In [None]:
df_train_deduped_urban3 = pd.concat([df_train_deduped_7000,df_train_deduped_8000,df_train_deduped_9000])
df_train_deduped_urban3.to_csv("df_train_deduped_urban3.csv", index=False)

In [None]:
df_train_deduped_10000['urban_core'] = df_train_deduped_10000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_10000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_11000['urban_core'] = df_train_deduped_11000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_11000[["wildfire_status",'urban_core']].value_counts().sort_index()
df_train_deduped_12000['urban_core'] = df_train_deduped_12000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_12000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_10000['urban_core'] = df_train_deduped_10000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_11000['urban_core'] = df_train_deduped_11000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,False,57
nowildfire,True,943


In [None]:
df_train_deduped_10000['urban_commute'] = df_train_deduped_10000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_10000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_10000['urban_commute'] = df_train_deduped_10000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_11000['urban_commute'] = df_train_deduped_11000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_11000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_11000['urban_commute'] = df_train_deduped_11000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_12000['urban_commute'] = df_train_deduped_12000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_12000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_12000['urban_commute'] = df_train_deduped_12000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_urban4 = pd.concat([df_train_deduped_10000,df_train_deduped_11000,df_train_deduped_12000])
df_train_deduped_urban4.to_csv("df_train_deduped_urban4.csv", index=False)

In [None]:
df_train_deduped_13000['urban_core'] = df_train_deduped_13000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_14000['urban_core'] = df_train_deduped_14000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_15000['urban_core'] = df_train_deduped_15000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_15000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_13000['urban_core'] = df_train_deduped_13000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_14000['urban_core'] = df_train_deduped_14000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
nowildfire,True,500
wildfire,False,500


In [None]:
df_train_deduped_13000['urban_commute'] = df_train_deduped_13000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_13000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_13000['urban_commute'] = df_train_deduped_13000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_14000['urban_commute'] = df_train_deduped_14000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_14000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_14000['urban_commute'] = df_train_deduped_14000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,1000


In [None]:
df_train_deduped_15000['urban_commute'] = df_train_deduped_15000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_15000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_15000['urban_commute'] = df_train_deduped_15000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,500
wildfire,False,500


In [None]:
df_train_deduped_urban5 = pd.concat([df_train_deduped_13000,df_train_deduped_14000,df_train_deduped_15000])
df_train_deduped_urban5.to_csv("df_train_deduped_urban5.csv", index=False)

In [None]:
df_train_deduped_16000['urban_core'] = df_train_deduped_16000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_17000['urban_core'] = df_train_deduped_17000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_18000['urban_core'] = df_train_deduped_18000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_18000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_16000['urban_core'] = df_train_deduped_16000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_17000['urban_core'] = df_train_deduped_17000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
wildfire,False,1000


In [None]:
df_train_deduped_16000['urban_commute'] = df_train_deduped_16000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_16000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_16000['urban_commute'] = df_train_deduped_16000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,1000


In [None]:
df_train_deduped_17000['urban_commute'] = df_train_deduped_17000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_17000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_17000['urban_commute'] = df_train_deduped_17000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,1000


In [None]:
df_train_deduped_18000['urban_commute'] = df_train_deduped_18000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_18000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_18000['urban_commute'] = df_train_deduped_18000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,954
wildfire,True,46


In [None]:
df_train_deduped_urban6 = pd.concat([df_train_deduped_16000,df_train_deduped_17000,df_train_deduped_18000])
df_train_deduped_urban6.to_csv("df_train_deduped_urban6.csv", index=False)

In [None]:
df_train_deduped_19000['urban_core'] = df_train_deduped_19000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_20000['urban_core'] = df_train_deduped_20000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_21000['urban_core'] = df_train_deduped_21000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_21000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_19000['urban_core'] = df_train_deduped_19000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_20000['urban_core'] = df_train_deduped_20000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
wildfire,False,980
wildfire,True,20


In [None]:
df_train_deduped_19000['urban_commute'] = df_train_deduped_19000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_19000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_19000['urban_commute'] = df_train_deduped_19000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,869
wildfire,True,131


In [None]:
df_train_deduped_20000['urban_commute'] = df_train_deduped_20000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_20000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_20000['urban_commute'] = df_train_deduped_20000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,929
wildfire,True,71


In [None]:
df_train_deduped_21000['urban_commute'] = df_train_deduped_21000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_21000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_21000['urban_commute'] = df_train_deduped_21000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,714
wildfire,True,286


In [None]:
df_train_deduped_urban7 = pd.concat([df_train_deduped_19000,df_train_deduped_20000,df_train_deduped_21000])
df_train_deduped_urban7.to_csv("df_train_deduped_urban7.csv", index=False)

In [None]:
df_train_deduped_22000['urban_core'] = df_train_deduped_22000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_23000['urban_core'] = df_train_deduped_23000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_24000['urban_core'] = df_train_deduped_24000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_24000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_22000['urban_core'] = df_train_deduped_22000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_23000['urban_core'] = df_train_deduped_23000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
wildfire,False,930
wildfire,True,70


In [None]:
df_train_deduped_22000['urban_commute'] = df_train_deduped_22000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_22000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_22000['urban_commute'] = df_train_deduped_22000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,754
wildfire,True,246


In [None]:
df_train_deduped_23000['urban_commute'] = df_train_deduped_23000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_23000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_23000['urban_commute'] = df_train_deduped_23000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,593
wildfire,True,407


In [None]:
df_train_deduped_24000['urban_commute'] = df_train_deduped_24000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_24000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_24000['urban_commute'] = df_train_deduped_24000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,745
wildfire,True,255


In [None]:
df_train_deduped_urban8 = pd.concat([df_train_deduped_22000,df_train_deduped_23000,df_train_deduped_24000])
df_train_deduped_urban8.to_csv("df_train_deduped_urban8.csv", index=False)

In [None]:
df_train_deduped_25000['urban_core'] = df_train_deduped_25000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_26000['urban_core'] = df_train_deduped_26000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_27000['urban_core'] = df_train_deduped_27000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_27000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_25000['urban_core'] = df_train_deduped_25000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_26000['urban_core'] = df_train_deduped_26000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
wildfire,False,988
wildfire,True,12


In [None]:
df_train_deduped_25000['urban_commute'] = df_train_deduped_25000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_25000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_25000['urban_commute'] = df_train_deduped_25000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,786
wildfire,True,214


In [None]:
df_train_deduped_26000['urban_commute'] = df_train_deduped_26000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_26000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_26000['urban_commute'] = df_train_deduped_26000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,719
wildfire,True,281


In [None]:
df_train_deduped_27000['urban_commute'] = df_train_deduped_27000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_27000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_27000['urban_commute'] = df_train_deduped_27000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,778
wildfire,True,222


In [None]:
df_train_deduped_urban9 = pd.concat([df_train_deduped_25000,df_train_deduped_26000,df_train_deduped_27000])
df_train_deduped_urban9.to_csv("df_train_deduped_urban9.csv", index=False)

BLOCK

In [None]:
df_train_deduped_28000['urban_core'] = df_train_deduped_28000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_29000['urban_core'] = df_train_deduped_29000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_30000['urban_core'] = df_train_deduped_30000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
df_train_deduped_30000[["wildfire_status",'urban_core']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_28000['urban_core'] = df_train_deduped_28000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_29000['urban_core'] = df_train_deduped_29000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_1),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://

Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_core,Unnamed: 2_level_1
wildfire,False,1250


In [None]:
df_train_deduped_28000['urban_commute'] = df_train_deduped_28000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_28000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_28000['urban_commute'] = df_train_deduped_28000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,826
wildfire,True,174


In [None]:
df_train_deduped_29000['urban_commute'] = df_train_deduped_29000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_29000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_29000['urban_commute'] = df_train_deduped_29000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,997
wildfire,True,3


In [None]:
df_train_deduped_30000['urban_commute'] = df_train_deduped_30000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)
df_train_deduped_30000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped_30000['urban_commute'] = df_train_deduped_30000.apply(lambda x: is_urban((x.latitude_numeric,x.longitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,1250


In [None]:
df_train_deduped_urban10 = pd.concat([df_train_deduped_28000,df_train_deduped_29000,df_train_deduped_30000])
df_train_deduped_urban10.to_csv("df_train_deduped_urban10.csv", index=False)

In [None]:
df_train_deduped_urban1 = pd.read_csv("df_train_deduped_urban1.csv")
df_train_deduped_urban2 = pd.read_csv("df_train_deduped_urban2.csv")
df_train_deduped_urban3 = pd.read_csv("df_train_deduped_urban3.csv")
df_train_deduped_urban4 = pd.read_csv("df_train_deduped_urban4.csv")
df_train_deduped_urban5 = pd.read_csv("df_train_deduped_urban5.csv")
df_train_deduped_urban6 = pd.read_csv("df_train_deduped_urban6.csv")
df_train_deduped_urban7 = pd.read_csv("df_train_deduped_urban7.csv")
df_train_deduped_urban8 = pd.read_csv("df_train_deduped_urban8.csv")
df_train_deduped_urban9 = pd.read_csv("df_train_deduped_urban9.csv")
df_train_deduped_urban10 = pd.read_csv("df_train_deduped_urban10.csv")

df_train_deduped_urban = pd.concat([df_train_deduped_urban1,df_train_deduped_urban2,df_train_deduped_urban3,
                                    df_train_deduped_urban4,df_train_deduped_urban5,df_train_deduped_urban6,
                                    df_train_deduped_urban7,df_train_deduped_urban8,df_train_deduped_urban9,
                                    df_train_deduped_urban10])
df_train_deduped_urban["urban"] = df_train_deduped_urban.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)
df_train_deduped_urban

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core,urban_commute,urban
0,"./train/nowildfire/-79.535187,43.882733.jpg",-79.535187,43.882733,nowildfire,-79.535187,43.882733,True,True,True
1,"./train/nowildfire/-79.533182,43.784243.jpg",-79.533182,43.784243,nowildfire,-79.533182,43.784243,True,True,True
2,"./train/nowildfire/-79.534956,43.647635.jpg",-79.534956,43.647635,nowildfire,-79.534956,43.647635,True,True,True
3,"./train/nowildfire/-79.536092,43.633224.jpg",-79.536092,43.633224,nowildfire,-79.536092,43.633224,True,True,True
4,"./train/nowildfire/-79.531656,43.704536.jpg",-79.531656,43.704536,nowildfire,-79.531656,43.704536,True,True,True
...,...,...,...,...,...,...,...,...,...
3245,"./train/wildfire/-66.73731,50.35672.jpg",-66.737310,50.356720,wildfire,-66.737310,50.356720,False,False,False
3246,"./train/wildfire/-66.75804,48.01979.jpg",-66.758040,48.019790,wildfire,-66.758040,48.019790,False,False,False
3247,"./train/wildfire/-66.7444,50.05576.jpg",-66.744400,50.055760,wildfire,-66.744400,50.055760,False,False,False
3248,"./train/wildfire/-66.72849,48.01978.jpg",-66.728490,48.019780,wildfire,-66.728490,48.019780,False,False,False


In [None]:
df_train_deduped.shape #30250
df_train_deduped_urban.shape #30250
df_train_deduped["latitude"] = df_train_deduped["latitude"].astype("float64")
df_train_deduped["longitude"] = df_train_deduped["longitude"].astype("float64")
df_train_deduped_urban.info()

<class 'pandas.core.frame.DataFrame'>
Index: 30250 entries, 0 to 3249
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   file_name          30250 non-null  object 
 1   latitude           30250 non-null  float64
 2   longitude          30250 non-null  float64
 3   wildfire_status    30250 non-null  object 
 4   latitude_numeric   30250 non-null  float64
 5   longitude_numeric  30250 non-null  float64
 6   urban_core         30250 non-null  bool   
 7   urban_commute      30250 non-null  bool   
 8   urban              30250 non-null  bool   
dtypes: bool(3), float64(4), object(2)
memory usage: 1.7+ MB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped["latitude"] = df_train_deduped["latitude"].astype("float64")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_deduped["longitude"] = df_train_deduped["longitude"].astype("float64")


In [None]:
# check to ensure that this also contains 30250 rows
df_train_deduped.merge(df_train_deduped_urban, on=["file_name",
                                                   "latitude",
                                                   "longitude",
                                                   "wildfire_status",
                                                   "latitude_numeric","longitude_numeric"])

Unnamed: 0,file_name,latitude,longitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core,urban_commute,urban
0,"./train/nowildfire/-79.535187,43.882733.jpg",-79.535187,43.882733,nowildfire,-79.535187,43.882733,True,True,True
1,"./train/nowildfire/-79.533182,43.784243.jpg",-79.533182,43.784243,nowildfire,-79.533182,43.784243,True,True,True
2,"./train/nowildfire/-79.534956,43.647635.jpg",-79.534956,43.647635,nowildfire,-79.534956,43.647635,True,True,True
3,"./train/nowildfire/-79.536092,43.633224.jpg",-79.536092,43.633224,nowildfire,-79.536092,43.633224,True,True,True
4,"./train/nowildfire/-79.531656,43.704536.jpg",-79.531656,43.704536,nowildfire,-79.531656,43.704536,True,True,True
...,...,...,...,...,...,...,...,...,...
30245,"./train/wildfire/-66.73731,50.35672.jpg",-66.737310,50.356720,wildfire,-66.737310,50.356720,False,False,False
30246,"./train/wildfire/-66.75804,48.01979.jpg",-66.758040,48.019790,wildfire,-66.758040,48.019790,False,False,False
30247,"./train/wildfire/-66.7444,50.05576.jpg",-66.744400,50.055760,wildfire,-66.744400,50.055760,False,False,False
30248,"./train/wildfire/-66.72849,48.01978.jpg",-66.728490,48.019780,wildfire,-66.728490,48.019780,False,False,False


In [None]:
df_train_deduped_urban.to_csv("df_train_deduped_urban.csv", index=False)

# df_test

In [None]:
lists = []

status = ["nowildfire","wildfire"]
for state in status:
  for filename in os.listdir("./test/"+state):
    temp = []
    temp.append('./test/'+state+'/'+filename)
    temp.append(filename[:filename.index(',')])
    temp.append(filename[filename.index(',')+1:filename.index('.jpg')])
    temp.append(state)
    lists.append(temp)

df_test = pd.DataFrame(lists, columns=["file_name","longitude","latitude","wildfire_status"])
df_test

Unnamed: 0,file_name,longitude,latitude,wildfire_status
0,"./test/nowildfire/-75.591351,45.397808.jpg",-75.591351,45.397808,nowildfire
1,"./test/nowildfire/-75.582705,45.455013.jpg",-75.582705,45.455013,nowildfire
2,"./test/nowildfire/-75.576579,45.426425.jpg",-75.576579,45.426425,nowildfire
3,"./test/nowildfire/-75.572866,45.43287.jpg",-75.572866,45.43287,nowildfire
4,"./test/nowildfire/-75.586328,45.462467.jpg",-75.586328,45.462467,nowildfire
...,...,...,...,...
6295,"./test/wildfire/-70.04461,49.43645.jpg",-70.04461,49.43645,wildfire
6296,"./test/wildfire/-70.1365,50.839.jpg",-70.1365,50.839,wildfire
6297,"./test/wildfire/-70.37538,47.52518.jpg",-70.37538,47.52518,wildfire
6298,"./test/wildfire/-70.33803,47.56565.jpg",-70.33803,47.56565,wildfire


In [None]:
# data cleaning
df_test["latitude_numeric"]=pd.to_numeric(df_test["latitude"])
df_test["longitude_numeric"] = np.where(df_test["longitude"].str.endswith("(1)"),
                                        df_test["longitude"].str.replace(" (1)",""),
                                        df_test["longitude"])
df_test["longitude_numeric"] = pd.to_numeric(df_test["longitude_numeric"])

df_test_deduped = df_test.drop_duplicates(subset=["wildfire_status","latitude_numeric","longitude_numeric"], keep="last")

In [None]:
# # remember, latitude and longitude are reversed in dataset.
df_test_deduped['urban_core'] = df_test_deduped.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_1),axis=1)
df_test_deduped

Unnamed: 0,file_name,longitude,latitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core
0,"./test/nowildfire/-75.591351,45.397808.jpg",-75.591351,45.397808,nowildfire,45.397808,-75.591351,True
1,"./test/nowildfire/-75.582705,45.455013.jpg",-75.582705,45.455013,nowildfire,45.455013,-75.582705,True
2,"./test/nowildfire/-75.576579,45.426425.jpg",-75.576579,45.426425,nowildfire,45.426425,-75.576579,True
3,"./test/nowildfire/-75.572866,45.43287.jpg",-75.572866,45.43287,nowildfire,45.432870,-75.572866,True
4,"./test/nowildfire/-75.586328,45.462467.jpg",-75.586328,45.462467,nowildfire,45.462467,-75.586328,True
...,...,...,...,...,...,...,...
6295,"./test/wildfire/-70.04461,49.43645.jpg",-70.04461,49.43645,wildfire,49.436450,-70.044610,False
6296,"./test/wildfire/-70.1365,50.839.jpg",-70.1365,50.839,wildfire,50.839000,-70.136500,False
6297,"./test/wildfire/-70.37538,47.52518.jpg",-70.37538,47.52518,wildfire,47.525180,-70.375380,False
6298,"./test/wildfire/-70.33803,47.56565.jpg",-70.33803,47.56565,wildfire,47.565650,-70.338030,False


In [None]:
df_test_deduped["urban_core"].value_counts()

Unnamed: 0_level_0,count
urban_core,Unnamed: 1_level_1
False,3688
True,2612


In [None]:
df_test_deduped_1000 = df_test_deduped.iloc[:1000,]
df_test_deduped_2000 = df_test_deduped.iloc[1000:2000,]
df_test_deduped_3000 = df_test_deduped.iloc[2000:3000,]
df_test_deduped_4000 = df_test_deduped.iloc[3000:4000,]
df_test_deduped_5000 = df_test_deduped.iloc[4000:5000,]
df_test_deduped_6000 = df_test_deduped.iloc[5000:,]
df_test_deduped_6000

Unnamed: 0,file_name,longitude,latitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core
5000,"./test/wildfire/-71.0024,49.9876.jpg",-71.0024,49.9876,wildfire,49.98760,-71.00240,False
5001,"./test/wildfire/-70.97158,48.36211.jpg",-70.97158,48.36211,wildfire,48.36211,-70.97158,False
5002,"./test/wildfire/-70.9894,46.9311.jpg",-70.9894,46.9311,wildfire,46.93110,-70.98940,False
5003,"./test/wildfire/-70.98514,48.26619.jpg",-70.98514,48.26619,wildfire,48.26619,-70.98514,False
5004,"./test/wildfire/-71.00278,46.19544.jpg",-71.00278,46.19544,wildfire,46.19544,-71.00278,False
...,...,...,...,...,...,...,...
6295,"./test/wildfire/-70.04461,49.43645.jpg",-70.04461,49.43645,wildfire,49.43645,-70.04461,False
6296,"./test/wildfire/-70.1365,50.839.jpg",-70.1365,50.839,wildfire,50.83900,-70.13650,False
6297,"./test/wildfire/-70.37538,47.52518.jpg",-70.37538,47.52518,wildfire,47.52518,-70.37538,False
6298,"./test/wildfire/-70.33803,47.56565.jpg",-70.33803,47.56565,wildfire,47.56565,-70.33803,False


In [None]:
df_test_deduped_1000['urban_commute'] = df_test_deduped_1000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_1000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_1000['urban_commute'] = df_test_deduped_1000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,16
nowildfire,True,984


In [None]:
df_test_deduped_2000['urban_commute'] = df_test_deduped_2000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_2000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_2000['urban_commute'] = df_test_deduped_2000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,False,104
nowildfire,True,896


In [None]:
df_test_deduped_3000['urban_commute'] = df_test_deduped_3000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_3000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_3000['urban_commute'] = df_test_deduped_3000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
nowildfire,True,820
wildfire,False,153
wildfire,True,27


In [None]:
df_test_deduped_4000['urban_commute'] = df_test_deduped_4000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_4000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_4000['urban_commute'] = df_test_deduped_4000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,953
wildfire,True,47


In [None]:
df_test_deduped_5000['urban_commute'] = df_test_deduped_5000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_5000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_5000['urban_commute'] = df_test_deduped_5000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,767
wildfire,True,233


In [None]:
df_test_deduped_6000['urban_commute'] = df_test_deduped_6000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)
df_test_deduped_6000[["wildfire_status",'urban_commute']].value_counts().sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_deduped_6000['urban_commute'] = df_test_deduped_6000.apply(lambda x: is_urban((x.longitude_numeric,x.latitude_numeric),all_shapes_2),axis=1)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
wildfire_status,urban_commute,Unnamed: 2_level_1
wildfire,False,1090
wildfire,True,210


In [None]:
df_test_deduped_uban = pd.concat([df_test_deduped_1000,df_test_deduped_2000,df_test_deduped_3000,
                                  df_test_deduped_4000,df_test_deduped_5000,df_test_deduped_6000])
df_test_deduped_uban["urban"] = df_test_deduped_uban.apply(lambda x: True if x['urban_core'] | x['urban_commute'] else False,axis=1)
df_test_deduped_uban

Unnamed: 0,file_name,longitude,latitude,wildfire_status,latitude_numeric,longitude_numeric,urban_core,urban_commute,urban
0,"./test/nowildfire/-75.591351,45.397808.jpg",-75.591351,45.397808,nowildfire,45.397808,-75.591351,True,True,True
1,"./test/nowildfire/-75.582705,45.455013.jpg",-75.582705,45.455013,nowildfire,45.455013,-75.582705,True,True,True
2,"./test/nowildfire/-75.576579,45.426425.jpg",-75.576579,45.426425,nowildfire,45.426425,-75.576579,True,True,True
3,"./test/nowildfire/-75.572866,45.43287.jpg",-75.572866,45.43287,nowildfire,45.432870,-75.572866,True,True,True
4,"./test/nowildfire/-75.586328,45.462467.jpg",-75.586328,45.462467,nowildfire,45.462467,-75.586328,True,True,True
...,...,...,...,...,...,...,...,...,...
6295,"./test/wildfire/-70.04461,49.43645.jpg",-70.04461,49.43645,wildfire,49.436450,-70.044610,False,False,False
6296,"./test/wildfire/-70.1365,50.839.jpg",-70.1365,50.839,wildfire,50.839000,-70.136500,False,False,False
6297,"./test/wildfire/-70.37538,47.52518.jpg",-70.37538,47.52518,wildfire,47.525180,-70.375380,False,False,False
6298,"./test/wildfire/-70.33803,47.56565.jpg",-70.33803,47.56565,wildfire,47.565650,-70.338030,False,False,False


In [None]:
df_test_deduped_uban[["wildfire_status","urban","urban_core","urban_commute"]].value_counts().sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
wildfire_status,urban,urban_core,urban_commute,Unnamed: 4_level_1
nowildfire,False,False,False,120
nowildfire,True,False,True,125
nowildfire,True,True,True,2575
wildfire,False,False,False,2963
wildfire,True,False,True,480
wildfire,True,True,True,37


In [None]:
df_test_deduped_uban.to_csv("df_test_deduped_uban.csv", index=False)