# API Call and Cleaning

In [None]:
import pandas as pd
from sodapy import Socrata

# Unauthenticated client use of data.austintexas.gov API
client = Socrata("data.austintexas.gov", None)

# Get results, returned as JSON from API / converted to Python list of dictionaries by sodapy
results = client.get("fdj4-gpfu", where=[{"occ_date">="2014-01-01T00:00:00.000"}], limit=500000)

# Convert to pandas DataFrame
crime_df = pd.DataFrame.from_records(results)

In [None]:
crime_df.head()

In [None]:
crime_df.shape

In [None]:
crime_df.columns

In [None]:
crime_df.isnull().sum()

In [None]:
crime_clean_df = crime_df.drop(['family_violence', 
                                'occ_date_time', 
                                'occ_time', 
                                'rep_date_time', 
                                'rep_date',
                                'rep_time', 
                                'clearance_status', 
                                'clearance_date', 
                                'pra', 
                                'ucr_category', 
                                'category_description',
                                'location'], axis=1, inplace=False)

In [None]:
crime_clean_df.head()

In [None]:
crime_clean_df = crime_clean_df.dropna()

In [None]:
crime_clean_df.shape

In [None]:
crime_clean_df.dtypes

In [None]:
crime_clean_df['incident_report_number'] = crime_clean_df['incident_report_number'].astype('int64')
crime_clean_df['crime_type'] = crime_clean_df['crime_type'].astype('str')
crime_clean_df['ucr_code'] = crime_clean_df['ucr_code'].astype('int')
crime_clean_df['location_type'] = crime_clean_df['location_type'].astype('str')
crime_clean_df['address'] = crime_clean_df['address'].astype('str')
crime_clean_df['zip_code'] = crime_clean_df['zip_code'].astype('int')
crime_clean_df['council_district'] = crime_clean_df['council_district'].astype('int')
crime_clean_df['sector'] = crime_clean_df['sector'].astype('str')
crime_clean_df['district'] = crime_clean_df['district'].astype('str')
crime_clean_df['census_tract'] = crime_clean_df['census_tract'].astype('str')
crime_clean_df['x_coordinate'] = crime_clean_df['x_coordinate'].astype('float64')
crime_clean_df['y_coordinate'] = crime_clean_df['y_coordinate'].astype('float64')
crime_clean_df['latitude'] = crime_clean_df['latitude'].astype('float64')
crime_clean_df['longitude'] = crime_clean_df['longitude'].astype('float64')

In [None]:
crime_clean_df['occ_date'] = pd.to_datetime(crime_clean_df['occ_date'])

In [None]:
crime_clean_df.dtypes

In [None]:
crime_clean_df.groupby(['zip_code'])['zip_code'].count()

In [None]:
crime_clean_df.groupby([''])

In [None]:
crime_clean_df['census_tract'].unique()

In [None]:
crime_clean_df['census_tract'].nunique()

In [None]:
census_tract_counts = crime_clean_df.census_tract.value_counts()
census_tract_df = pd.DataFrame(census_tract_counts)
census_tract_df.head()

In [None]:
census_tract_df.index.name = 'census_tract'
census_tract_df.rename(columns = {'census_tract':'incident_report_count'}, inplace=True)
census_tract_df.head()

In [None]:
crime_clean_df.to_csv('../Resources/Austin_Crime_Reports_Clean.csv')

# Database Connection

In [None]:
import sqlite3

In [None]:
conn = sqlite3.connect('gentrification_db.sqlite')

In [None]:
crime_clean_df.to_sql(name='crime_reports_df', con=conn, if_exists='replace', index=False)

In [None]:
table_name = "crime_reports_df"
sql_statement = f"SELECT * FROM {table_name};"
print(sql_statement)

crime_reports_df = pd.read_sql(sql_statement, conn)

crime_reports_df.head(5)

In [None]:
census_tract_df.to_sql(name='crime_census_df', con=conn, if_exists='replace', index=True)

In [None]:
table_name = "crime_census_df"
sql_statement = f"SELECT * FROM {table_name};"
print(sql_statement)

crime_census_df = pd.read_sql(sql_statement, conn)

crime_census_df.head(5)