In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['figure.dpi'] = 600
plt.rcParams['savefig.dpi'] = 600

sns.set_theme(style='darkgrid', palette='viridis', rc={
              "figure.dpi": 600, 'savefig.dpi': 600, 'figure.figsize': (11.7, 8.27)})

In [19]:
df = pd.read_csv('../assets/fire_locs.csv')
df.head()

Unnamed: 0,_uid_,id,longitude,latitude,initialdate,days
0,16.0,966855.0,-2107749.0,1663840.0,2002-01-08,0
1,19.0,966859.0,-1599552.0,1247091.0,2002-01-08,7
2,19.0,966859.0,-1599552.0,1247091.0,2002-01-09,7
3,17.0,966856.0,-2106833.0,1661532.0,2002-01-10,0
4,19.0,966859.0,-1599552.0,1247091.0,2002-01-10,7


In [20]:
df['initialdate'] = pd.to_datetime(df['initialdate'])

In [21]:
# Saving a copy of the original dataframe to odf

odf = df.copy()

In [22]:
from random import randint

# Converting initial date to a random date between 30-90 days before the initial date

df['initialdate'] = df['initialdate'].apply(
    lambda x: x - pd.DateOffset(days=randint(30, 90)))

df.head()

Unnamed: 0,_uid_,id,longitude,latitude,initialdate,days
0,16.0,966855.0,-2107749.0,1663840.0,2001-10-11,0
1,19.0,966859.0,-1599552.0,1247091.0,2001-10-26,7
2,19.0,966859.0,-1599552.0,1247091.0,2001-11-23,7
3,17.0,966856.0,-2106833.0,1661532.0,2001-10-21,0
4,19.0,966859.0,-1599552.0,1247091.0,2001-11-09,7


In [23]:
df.to_csv('../assets/negative_class_same_locations.csv', index=False)

In [24]:
df = odf.__deepcopy__()
df.head()

Unnamed: 0,_uid_,id,longitude,latitude,initialdate,days
0,16.0,966855.0,-2107749.0,1663840.0,2002-01-08,0
1,19.0,966859.0,-1599552.0,1247091.0,2002-01-08,7
2,19.0,966859.0,-1599552.0,1247091.0,2002-01-09,7
3,17.0,966856.0,-2106833.0,1661532.0,2002-01-10,0
4,19.0,966859.0,-1599552.0,1247091.0,2002-01-10,7


In [25]:
df.longitude

0        -2.107749e+06
1        -1.599552e+06
2        -1.599552e+06
3        -2.106833e+06
4        -1.599552e+06
              ...     
167056   -2.182605e+06
167057   -2.087646e+06
167058   -2.087646e+06
167059   -2.087646e+06
167060   -2.087646e+06
Name: longitude, Length: 167061, dtype: float64

In [26]:
import numpy as np


def add_random_offset_projected(x, y, max_radius_meters=100000):

    theta = np.random.uniform(0, 2 * np.pi)

    r = max_radius_meters * np.sqrt(np.random.uniform(0, 1))

    dx = r * np.cos(theta)
    dy = r * np.sin(theta)

    return x + dx, y + dy


df[['longitude', 'latitude']] = df[['longitude', 'latitude']].apply(
    lambda row: pd.Series(add_random_offset_projected(
        row['longitude'], row['latitude'])),
    axis=1
)
df.head()

Unnamed: 0,_uid_,id,longitude,latitude,initialdate,days
0,16.0,966855.0,-2093528.0,1630820.0,2002-01-08,0
1,19.0,966859.0,-1630234.0,1215416.0,2002-01-08,7
2,19.0,966859.0,-1590190.0,1254799.0,2002-01-09,7
3,17.0,966856.0,-2101847.0,1721695.0,2002-01-10,0
4,19.0,966859.0,-1687368.0,1268341.0,2002-01-10,7


In [27]:
# Adding random offset to initial date

df['initialdate'] = df['initialdate'].apply(
    lambda x: x + pd.DateOffset(days=randint(-60, 60)))

df.head()

Unnamed: 0,_uid_,id,longitude,latitude,initialdate,days
0,16.0,966855.0,-2093528.0,1630820.0,2001-12-29,0
1,19.0,966859.0,-1630234.0,1215416.0,2002-02-12,7
2,19.0,966859.0,-1590190.0,1254799.0,2002-01-16,7
3,17.0,966856.0,-2101847.0,1721695.0,2002-01-13,0
4,19.0,966859.0,-1687368.0,1268341.0,2001-12-24,7


In [28]:
df.to_csv('../assets/negative_class_random_complete.csv', index=False)