In [1]:
import pandas as pd 
# Read data from file 'filename.csv' 
# (in the same directory that your python process is based)
# Control delimiters, rows, column names with read_csv (see later) 
vancover_data = pd.read_csv("../original_datasets/vancouver.csv") 
# Preview the first 5 lines of the loaded data 
vancover_data.head(5)

Unnamed: 0,TYPE,YEAR,MONTH,DAY,HOUR,MINUTE,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
0,Break and Enter Commercial,2012,12,14,8,52,,Oakridge,491285.0,5453433.0
1,Break and Enter Commercial,2019,3,7,2,6,10XX SITKA SQ,Fairview,490612.964805,5457110.0
2,Break and Enter Commercial,2019,8,27,4,12,10XX ALBERNI ST,West End,491007.779775,5459174.0
3,Break and Enter Commercial,2014,8,8,5,13,10XX ALBERNI ST,West End,491015.943352,5459166.0
4,Break and Enter Commercial,2005,11,14,3,9,10XX ALBERNI ST,West End,491021.385727,5459161.0


In [2]:
new_van_data = vancover_data.drop(['TYPE', 'YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE'], axis=1)

new_van_data.head(10)

Unnamed: 0,HUNDRED_BLOCK,NEIGHBOURHOOD,X,Y
0,,Oakridge,491285.0,5453433.0
1,10XX SITKA SQ,Fairview,490612.964805,5457110.0
2,10XX ALBERNI ST,West End,491007.779775,5459174.0
3,10XX ALBERNI ST,West End,491015.943352,5459166.0
4,10XX ALBERNI ST,West End,491021.385727,5459161.0
5,10XX ALBERNI ST,West End,491021.385727,5459161.0
6,10XX ALBERNI ST,West End,491021.385727,5459161.0
7,10XX ALBERNI ST,West End,491032.270497,5459150.0
8,10XX ALBERNI ST,West End,491032.270497,5459150.0
9,10XX ALBERNI ST,West End,491032.270497,5459150.0


In [3]:
import math

def utmToLatLng(zone, easting, northing, northernHemisphere=True):
    if not northernHemisphere:
        northing = 10000000 - northing

    a = 6378137
    e = 0.081819191
    e1sq = 0.006739497
    k0 = 0.9996

    arc = northing / k0
    mu = arc / (a * (1 - math.pow(e, 2) / 4.0 - 3 * math.pow(e, 4) / 64.0 - 5 * math.pow(e, 6) / 256.0))

    ei = (1 - math.pow((1 - e * e), (1 / 2.0))) / (1 + math.pow((1 - e * e), (1 / 2.0)))

    ca = 3 * ei / 2 - 27 * math.pow(ei, 3) / 32.0

    cb = 21 * math.pow(ei, 2) / 16 - 55 * math.pow(ei, 4) / 32
    cc = 151 * math.pow(ei, 3) / 96
    cd = 1097 * math.pow(ei, 4) / 512
    phi1 = mu + ca * math.sin(2 * mu) + cb * math.sin(4 * mu) + cc * math.sin(6 * mu) + cd * math.sin(8 * mu)

    n0 = a / math.pow((1 - math.pow((e * math.sin(phi1)), 2)), (1 / 2.0))

    r0 = a * (1 - e * e) / math.pow((1 - math.pow((e * math.sin(phi1)), 2)), (3 / 2.0))
    fact1 = n0 * math.tan(phi1) / r0

    _a1 = 500000 - easting
    dd0 = _a1 / (n0 * k0)
    fact2 = dd0 * dd0 / 2

    t0 = math.pow(math.tan(phi1), 2)
    Q0 = e1sq * math.pow(math.cos(phi1), 2)
    fact3 = (5 + 3 * t0 + 10 * Q0 - 4 * Q0 * Q0 - 9 * e1sq) * math.pow(dd0, 4) / 24

    fact4 = (61 + 90 * t0 + 298 * Q0 + 45 * t0 * t0 - 252 * e1sq - 3 * Q0 * Q0) * math.pow(dd0, 6) / 720

    lof1 = _a1 / (n0 * k0)
    lof2 = (1 + 2 * t0 + Q0) * math.pow(dd0, 3) / 6.0
    lof3 = (5 - 2 * Q0 + 28 * t0 - 3 * math.pow(Q0, 2) + 8 * e1sq + 24 * math.pow(t0, 2)) * math.pow(dd0, 5) / 120
    _a2 = (lof1 - lof2 + lof3) / math.cos(phi1)
    _a3 = _a2 * 180 / math.pi

    latitude = 180 * (phi1 - fact1 * (fact2 + fact3 + fact4)) / math.pi

    if not northernHemisphere:
        latitude = -latitude

    longitude = ((zone > 0) and (6 * zone - 183.0) or 3.0) - _a3

    return (latitude, longitude)

In [18]:
latitudes = []
longitudes = []
#longitude is the first result and latitude is the second
for i in range(0, len(new_van_data['Y'].values), 1000):
    latits = new_van_data['X'].values[i:i+1000]
    longits = new_van_data['Y'].values[i:i+1000]
    for x, y in zip(latits, longits):
        result = utmToLatLng(10, x, y)
        latitudes.append(result[0])
        longitudes.append(result[1])    
    

In [33]:
# new_van_data = new_van_data.rename(columns={"longitudes": "longitude", "latitudes": "latitude"})
# # new_van_data.drop(new_van_data.columns[2], axis=1, inplace=True)
cols = new_van_data.columns.tolist()
print(cols)
new_cols = [cols[0], cols[1], cols[3], cols[2]]
new_van_data = new_van_data[new_cols]
new_van_data.head(5)

['location_name', 'longitude', 'neighborhood', 'latitude']


Unnamed: 0,location_name,longitude,latitude,neighborhood
0,,-123.119712,49.233614,Oakridge
1,10XX SITKA SQ,-123.129029,49.266678,Fairview
2,10XX ALBERNI ST,-123.123649,49.285255,West End
3,10XX ALBERNI ST,-123.123536,49.285181,West End
4,10XX ALBERNI ST,-123.123461,49.285132,West End


In [37]:
# print(city_array)
new_van_data['city'] = ['Vancover' for i in range(len(new_van_data.values))]
new_van_data['location_key'] = [i for i in range(len(new_van_data.values))]

In [38]:
new_van_data.head(5)

Unnamed: 0,location_name,longitude,latitude,neighborhood,city,location_key
0,,-123.119712,49.233614,Oakridge,Vancover,0
1,10XX SITKA SQ,-123.129029,49.266678,Fairview,Vancover,1
2,10XX ALBERNI ST,-123.123649,49.285255,West End,Vancover,2
3,10XX ALBERNI ST,-123.123536,49.285181,West End,Vancover,3
4,10XX ALBERNI ST,-123.123461,49.285132,West End,Vancover,4


In [39]:
cols = new_van_data.columns.tolist()
print(cols)
new_cols = [cols[5], cols[0], cols[1], cols[2], cols[3], cols[4]]
new_van_data = new_van_data[new_cols]
new_van_data.head(5)

['location_name', 'longitude', 'latitude', 'neighborhood', 'city', 'location_key']


Unnamed: 0,location_key,location_name,longitude,latitude,neighborhood,city
0,0,,-123.119712,49.233614,Oakridge,Vancover
1,1,10XX SITKA SQ,-123.129029,49.266678,Fairview,Vancover
2,2,10XX ALBERNI ST,-123.123649,49.285255,West End,Vancover
3,3,10XX ALBERNI ST,-123.123536,49.285181,West End,Vancover
4,4,10XX ALBERNI ST,-123.123461,49.285132,West End,Vancover


In [40]:
vancouver_address_csv = new_van_data.to_csv(r'/home/zaid/Desktop/datascience_datasets/Vancouver/crimedata_csv_all_years/vancouver_dataset.csv', index = None, header=True)

In [3]:
import pandas as pd

In [6]:
vancouver_data_frame = pd.read_csv("./vancouver_dataset.csv")
vancouver_data_frame.head(5)

Unnamed: 0,location_key,location_name,longitude,latitude,neighborhood,city
0,0,,-123.119712,49.233614,Oakridge,Vancover
1,1,10XX SITKA SQ,-123.129029,49.266678,Fairview,Vancover
2,2,10XX ALBERNI ST,-123.123649,49.285255,West End,Vancover
3,3,10XX ALBERNI ST,-123.123536,49.285181,West End,Vancover
4,4,10XX ALBERNI ST,-123.123461,49.285132,West End,Vancover


In [8]:
vancouver_data_frame['city'] = ['Vancouver' for i in range(len(vancouver_data_frame.values))]

In [9]:
vancouver_data_frame.head(5)

Unnamed: 0,location_key,location_name,longitude,latitude,neighborhood,city
0,0,,-123.119712,49.233614,Oakridge,Vancouver
1,1,10XX SITKA SQ,-123.129029,49.266678,Fairview,Vancouver
2,2,10XX ALBERNI ST,-123.123649,49.285255,West End,Vancouver
3,3,10XX ALBERNI ST,-123.123536,49.285181,West End,Vancouver
4,4,10XX ALBERNI ST,-123.123461,49.285132,West End,Vancouver


In [23]:
group_by_neighbourhood = vancouver_data_frame.groupby('neighborhood')
neighbourhood_data = group_by_neighbourhood['longitude', 'latitude'].mean()


  


In [24]:
neighbourhood_data.head(5)

Unnamed: 0_level_0,longitude,latitude
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
Arbutus Ridge,-123.160465,49.245008
Central Business District,-123.115697,49.28113
Dunbar-Southlands,-123.184537,49.244789
Fairview,-123.129618,49.264002
Grandview-Woodland,-123.067398,49.275228
