**This notebook assigns coordinates to wildfire twitter posts(because twitter block this info for most posts)**

This code is adapted from group member Juhee Sung-Schenck 

In [2]:
# import libraries

import pandas as pd
import numpy as np
import math
from numpy import random

import plotly.express as px
import plotly.offline as pyo
import plotly.graph_objs as go

In [3]:
df1 = pd.read_csv('../data/wildfire_new.csv')
df2 = pd.read_csv('../data/no_wildfire_new.csv')


In [4]:
# read in the data

eq = pd.concat([df1,df2])

In [10]:
df1.date.min()

'2019-09-18 10:21:27+00:00'

In [12]:
df2.date.min()

'2020-01-09 23:59:35+00:00'

In [5]:
eq.reset_index(drop=True, inplace=True)
eq.shape

(20000, 6)

In [6]:
# check for null values - geocode not available for all tweets

eq.isnull().sum()

id               0
text             1
keyword      17267
user_name        0
date             0
location     20000
dtype: int64

In [7]:
# create a function to generate coordinates within given radius
# this code is my adaptation of Haversine formula
# Bob's genius brain has been input here

def get_coordinates(num, center, radius):
        
    lat, long = center
    
    # add randomness to generate latitude and longitude within the given radius
    rho = np.sqrt(np.random.uniform(0, 1, num))
    phi = np.random.uniform(0, 2 * np.pi, num)
    
    dx = rho * radius * np.cos(phi)
    dy = rho * radius * np.sin(phi)

    # create an empty list to store generated coordinates 
    coordinates = []
    
    for i in range(num):
        
        coordinate = {}
        
        # earth's mean radius is 6371 km
        new_lat = lat + ((180 / math.pi) * (dy[i] / 6371) / math.sin(lat * math.pi / 180)) 
        new_long = long + ((180 / math.pi) * (dx[i] / 6371) / math.cos(long * math.pi / 180))
        
        coordinate['lat'] = new_lat
        coordinate['long'] = new_long
        
        # add to list
        coordinates.append(coordinate)

    return coordinates

In [8]:
coordinates = get_coordinates(len(eq['location']), (36.7378, -119.7871), 150) # --> radius of collected tweets from the center

In [9]:
# create columns to store latitude and longitude separately

eq['lat'] = [coordinate['lat'] for coordinate in coordinates]
eq['long'] = [coordinate['long'] for coordinate in coordinates]

In [10]:
eq.head()

Unnamed: 0,id,text,keyword,user_name,date,location,lat,long
0,1305657570774978560,Smoke from California wildfires causes hazy sk...,,FireandAviation,2020-09-14 23:59:57+00:00,,37.280386,-122.128109
1,1304359517552095235,96% Overwhelmingly Positive Reviews! Grab a fr...,,JoinDeepRock,2020-09-11 10:01:57+00:00,,36.052556,-119.40677
2,1305657551703412736,So a 2-3 degree temperature difference is what...,,matthew_paul17,2020-09-14 23:59:52+00:00,,35.839572,-122.243498
3,1305657540227850240,@POTUS is right about the #wildfires Expert in...,#wildfires #global,Josie95450522,2020-09-14 23:59:50+00:00,,34.897778,-119.722077
4,1305657538281533440,Wildfires in California ARE caused by poor lan...,,Arqahn,2020-09-14 23:59:49+00:00,,38.370841,-120.107434


In [12]:
# check the coordinates

fig = px.scatter_mapbox(eq, lat = 'lat', lon = 'long',
                        color_discrete_sequence = ['navy'], zoom = 1, height = 500)
fig.update_layout(mapbox_style = 'open-street-map')
fig.update_layout(margin = {'r': 0, 't': 0, 'l': 0, 'b':0})

fig.write_html('../images/test-fig2.html')
fig.show()

In [13]:
eq.drop(columns=['keyword', 'location']).to_csv('../data/final.csv', index=False)