In [None]:
# This code analyses a real dataset released by The city of Chicago to the Chicago City Portal. This dataset contains a selection of six socioeconomic indicators of public health significance and a “hardship index,” for each Chicago community area, for the years 2008 – 2012.

In [None]:
# first we install dependencies and create the database 

In [None]:
!pip install ipython-sql
%load_ext sql

In [None]:
import csv, sqlite3

con = sqlite3.connect("socioeconomic.db")
cur = con.cursor()
!pip install pandas 

In [None]:
%sql sqlite:///socioeconomic.db

# Storing the dataset in a Table using Pandas Dataframe 

In [None]:
import pandas
df = pandas.read_csv('https://data.cityofchicago.org/resource/jcxq-k9xf.csv')
df.to_sql("chicago_socioeconomic_data", con, if_exists='replace', index=False,method="multi")


In [None]:
# Install the 'ipython-sql' and 'prettytable' libraries using pip
!pip install ipython-sql prettytable

# Import the 'prettytable' library, which is used to display data in a formatted table
import prettytable

# Set the default display format for prettytable to 'DEFAULT' (i.e., a simple table format)
prettytable.DEFAULT = 'DEFAULT'

# to verify that out tables are accurate 
%sql SELECT * FROM chicago_socioeconomic_data;

# Some basic analysis on dataset

In [None]:
# communities with harship index greater than 50
%sql SELECT COUNT (*) FROM chicago_socioeconomic_data WHERE (hardship_index > 50.0);

In [None]:
# maximum value of hardship index
%sql SELECT MAX(hardship_index) FROM chicago_socioeconomic_data;

In [None]:
# community with the highest hardship index
%sql SELECT community_area_name FROM chicago_socioeconomic_data WHERE hardship_index = (SELECT MAX(hardship_index) FROM chicago_socioeconomic_data);


In [None]:
# communities with percapita income greater than $60,000
%%sql SELECT community_area_name, per_capita_income_ 
FROM chicago_socioeconomic_data 
WHERE per_capita_income_ > 60000;

In [None]:
!pip install matplotlib
!pip install seaborn


import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Run the SQL query and store the result in a variable
income_vs_hardship = %sql SELECT per_capita_income_, hardship_index FROM chicago_socioeconomic_data;

# Convert the result to a Pandas DataFrame
df = income_vs_hardship.DataFrame()

# Create a jointplot to visualize the relationship
plot = sns.jointplot(x='per_capita_income_', y='hardship_index', data=df, kind='scatter')

# Show the plot
plt.show()