In [9]:
from configparser import ConfigParser
import psycopg2
import numpy as np

In [10]:
def config(filename='psql_sample.ini', section='postgresql'):
    parser = ConfigParser()
    # read config file
    parser.read(filename)
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
    
    return db

In [11]:
cfg = config()

In [12]:
#Establish the connection and create a cursor to the database
try:
    print("Here's an attempt to connect to the database")
    conn = psycopg2.connect(**cfg)
    cursor = conn.cursor()
    print("Look's like it was a success")
except (Exception, psycopg2.DatabaseError) as error:
    print(error)

Here's an attempt to connect to the database
Look's like it was a success


In [13]:
dataframe_query = '''
select 
	neighborhood,
	is_traffic,
	is_fatal,
	is_nighttime,
	crime_type,
	is_american_holiday,
	is_canadian_holiday,
	month,
	city
from t_fact_table
inner join t_location_dim on t_location_dim.location_key = t_fact_table.location_key
inner join t_crime_dim on t_crime_dim.crime_key = t_fact_table.crime_key
inner join t_date_dim on t_date_dim.date_key = t_fact_table.date_key
'''
try:
    #Run a simple query
    cursor.execute(dataframe_query)
    #Loop over the result set and print first 10 rows
    result_list = cursor.fetchall()
except (Exception, psycopg2.DatabaseError) as error:
    print(error)

In [14]:
#close connection
cursor.close()
conn.close()

In [20]:
print(result_list[0:10])

[('montbello', False, False, True, 'Theft', False, False, '10', 'Denver'), ('northeast-park-hill', True, False, True, 'Traffic Accident', False, False, '1', 'Denver'), ('montbello', True, False, True, 'Traffic Accident', False, False, '1', 'Denver'), ('montbello', False, False, True, 'Offence Against a Person', False, False, '1', 'Denver'), ('lincoln-park', False, False, True, 'Theft of Vehicle', False, False, '1', 'Denver'), ('north-capitol-hill', False, False, True, 'Other Crimes', False, False, '1', 'Denver'), ('lincoln-park', False, False, True, 'Offence Against a Person', False, False, '10', 'Denver'), ('montbello', False, False, True, 'Offence Against a Person', False, False, '4', 'Denver'), ('globeville', False, False, True, 'Other Crimes', False, False, '4', 'Denver'), ('civic-center', False, False, True, 'Other Crimes', False, False, '4', 'Denver')]


In [21]:
import pandas as pd
inter_data_set = pd.DataFrame(result_list, columns= ['Neighborhood', 'Is_Traffic', 'Is_fatal', 'Is_nighttime', 'Crime_type', 'Is_american_holiday', 'Is_canadian_holiday', 'Month', 'City'])
inter_data_set.head()

Unnamed: 0,Neighborhood,Is_Traffic,Is_fatal,Is_nighttime,Crime_type,Is_american_holiday,Is_canadian_holiday,Month,City
0,montbello,False,False,True,Theft,False,False,10,Denver
1,northeast-park-hill,True,False,True,Traffic Accident,False,False,1,Denver
2,montbello,True,False,True,Traffic Accident,False,False,1,Denver
3,montbello,False,False,True,Offence Against a Person,False,False,1,Denver
4,lincoln-park,False,False,True,Theft of Vehicle,False,False,1,Denver


In [22]:
def get_season(month):
    if month == 12 or month == 1 or month == 2:
        return 'Winter'
    if month == 3 or month == 4 or month == 5 :
        return 'Spring'
    if month == 6 or month == 7 or month == 8: 
        return 'Summer'
    else:
        return 'Fall'

In [23]:
inter_data_set['Season'] = inter_data_set.apply(lambda row: get_season(int(row['Month'])), axis = 1)
inter_data_set.drop(['Month'], axis = 1)
inter_data_set.head()

Unnamed: 0,Neighborhood,Is_Traffic,Is_fatal,Is_nighttime,Crime_type,Is_american_holiday,Is_canadian_holiday,Month,City,Season
0,montbello,False,False,True,Theft,False,False,10,Denver,Fall
1,northeast-park-hill,True,False,True,Traffic Accident,False,False,1,Denver,Winter
2,montbello,True,False,True,Traffic Accident,False,False,1,Denver,Winter
3,montbello,False,False,True,Offence Against a Person,False,False,1,Denver,Winter
4,lincoln-park,False,False,True,Theft of Vehicle,False,False,1,Denver,Winter


In [24]:
inter_data_set.to_csv('./classification_dataset.csv', index=False)