In [1]:
import sqlite3
import pandas as pd
import matplotlib as plt

In [2]:
conn = sqlite3.connect('chicago_311.db')
c = conn.cursor()

In [17]:

# loading the data into a Pandas DataFrame
chicago_311_2018 = pd.read_csv('./chicago_311_2018.csv')
# writing the data to a sqlite table
chicago_311_2018.to_sql('chicago_311_2018', conn, if_exists='append', index = False)

461170

### 1. Retrieve the total number of complaints for each category.

In [18]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT category as Complaint_category
            ,COUNT(category) as Counts
            ,Rank() over (ORDER BY COUNT(category) DESC) AS Rank
            FROM chicago_311_2018
            GROUP BY category
            ORDER BY 2 DESC
            LIMIT 10
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Complaint_category,Counts,Rank
0,Graffiti Removal Request,50427,1
1,Weed Removal Request,46186,2
2,Street Light Out Complaint,42924,3
3,Garbage Cart Maintenance,24959,4
4,Rodent Baiting/Rat Complaint,24393,5
5,Tree Trim Request,22622,6
6,Pothole in Street Complaint,20522,7
7,Sign Repair Request - All Other Signs,17870,8
8,311 INFORMATION ONLY CALL,17812,9
9,Alley Light Out Complaint,14000,10


### 2. Calculate the average resolution time (in days) for closed complaints..

In [19]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            with cte as 
                (SELECT request_id
                ,responsibleagency
                ,closed_date,created_date
                ,CAST(julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) AS INTEGER) AS days_taken
                FROM chicago_311_2018
                WHERE closed_date is NOT NULL and created_date IS NOT NULL
                )
            SELECT responsibleagency as Responsible_agency
            ,MAX(days_taken) as Days_taken_max
            From cte
            GROUP BY responsibleagency
            ORDER BY MAX(days_taken) DESC;
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Responsible_agency,Days_taken_max
0,Streets and Sanitation,657
1,DWM - Department of Water Management,655
2,CDOT - Department of Transportation,645
3,BACP - Business Affairs and Consumer Protection,620
4,DOB - Buildings,539
5,Animal Care and Control,229
6,City Clerk's Office,147
7,Health,96
8,Department of Planning and Development,23
9,Extreme Weather Notification,0


### 3. Show a table with responsible agency, maximum days taken, and case ID that has taken the highest time to solve

In [20]:
c = conn.cursor()

# Execute the SQL query
c.execute('''         
            with cte as 
                (SELECT 
                request_id
                ,responsibleagency
                ,closed_date,created_date
                ,CAST(julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) AS INTEGER) AS days_taken
                FROM chicago_311_2018
                WHERE closed_date is NOT NULL and created_date IS NOT NULL
                )
            SELECT 
            cte.responsibleagency AS Agency
            ,MAX(days_taken) AS Days_taken_max 
            ,cte.request_id AS Request
            From cte
            Join (
                SELECT 
                DISTINCT(cte.responsibleagency)
                ,MAX(days_taken) as max_taken FROM cte GROUP BY responsibleagency)as cte_2 
            ON cte.responsibleagency = cte_2.responsibleagency 
            AND cte.days_taken = cte_2.max_taken
            GROUP BY cte.responsibleagency
            ORDER BY 2 DESC
            ;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Agency,Days_taken_max,Request
0,Streets and Sanitation,657,SR18-00084296
1,DWM - Department of Water Management,655,SR18-00085030
2,CDOT - Department of Transportation,645,SR18-00083485
3,BACP - Business Affairs and Consumer Protection,620,SR18-00089604
4,DOB - Buildings,539,SR18-00135108
5,Animal Care and Control,229,SR18-00140042
6,City Clerk's Office,147,SR18-00192815
7,Health,96,SR19-00626132
8,Department of Planning and Development,23,SR18-00209454
9,Extreme Weather Notification,0,SR18-00222661


### 4. List the boroughs with the highest number of complaints.

In [21]:
c = conn.cursor()

# Execute the SQL query
c.execute('''         

            WITH cte AS
                (SELECT 
                    precinct AS Precinct
                    ,count(request_id) AS Request_counts
                
                FROM chicago_311_2018
                Group BY precinct
                ORDER BY 2 DESC
                LIMIT 10)
            SELECT *
            FROM cte;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Precinct,Request_counts
0,46.0,21328
1,10.0,12649
2,11.0,12156
3,15.0,12155
4,5.0,12062
5,17.0,11679
6,19.0,11658
7,9.0,11531
8,2.0,11211
9,13.0,11194


### 5. Create a table showing agency name, maximum days taken to complete a request and the category name of that request

In [22]:
c = conn.cursor()

# Execute the SQL query
c.execute('''         
            with cte as 
                (SELECT 
                category
                ,responsibleagency
                ,closed_date,created_date
                ,CAST(julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) AS INTEGER) AS days_taken
                FROM chicago_311_2018
                WHERE closed_date is NOT NULL and created_date IS NOT NULL
                )
            SELECT 
            cte.responsibleagency AS Agency
            ,MAX(days_taken) AS Days_taken_max
            ,cte.category AS Category
            From cte
            Join (
                SELECT 
                DISTINCT cte.responsibleagency AS Agency
                ,MAX(days_taken) as Days_taken_max
            FROM cte 
            GROUP BY responsibleagency)as cte_2 
            ON cte.responsibleagency = cte_2.Agency 
            AND cte.days_taken = cte_2.Days_taken_max
            GROUP BY cte_2.Agency
            ORDER BY 2 DESC
            ;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Agency,Days_taken_max,Category
0,Streets and Sanitation,657,Tree Planting Request
1,DWM - Department of Water Management,655,Sewer Cleaning Inspection Request
2,CDOT - Department of Transportation,645,Alley Light Out Complaint
3,BACP - Business Affairs and Consumer Protection,620,Cab Feedback
4,DOB - Buildings,539,Vacant/Abandoned Building Complaint
5,Animal Care and Control,229,Stray Animal Complaint
6,City Clerk's Office,147,City Vehicle Sticker Violation
7,Health,96,Restaurant Complaint
8,Department of Planning and Development,23,Bungalow Rehab/Purchase Information Request
9,Extreme Weather Notification,0,Extreme Weather Notification


### 6. List the boroughs with the lowest number of complaints.

In [23]:
c = conn.cursor()

# Execute the SQL query
c.execute('''         

            SELECT 
                precinct,
                count(request_id)
            FROM chicago_311_2018
            Group BY precinct
            ORDER BY 2 ASC
            LIMIT 10;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,precinct,count(request_id)
0,57.0,151
1,56.0,364
2,55.0,380
3,54.0,422
4,52.0,790
5,51.0,919
6,53.0,987
7,50.0,1237
8,49.0,2166
9,48.0,2372


### 7. Determine the agencies with the highest number of complaints and their corresponding complaint types.

In [24]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT 
                responsibleagency as agency,
                COUNT(request_id) as total_comp_for_agency
                ,MAX(category) as max_comp_categor
            FROM chicago_311_2018
            GROUP BY responsibleagency
            ORDER BY 2 DESC
            ;

''')

# Fetch all the results
results = c.fetchall()

# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,agency,total_comp_for_agency,max_comp_categor
0,Streets and Sanitation,241150,Yard Waste Pick-Up Request
1,CDOT - Department of Transportation,129527,Viaduct Light Out Complaint
2,DWM - Department of Water Management,23645,Water in Basement Complaint
3,DOB - Buildings,18572,Vacant/Abandoned Building Complaint
4,Animal Care and Control,18065,Vicious Animal Complaint
5,311 City Services,17812,311 INFORMATION ONLY CALL
6,BACP - Business Affairs and Consumer Protection,6190,Tobacco - Sale to Minors Complaint
7,Aviation,3498,Aircraft Noise Complaint
8,Health,1821,Smokeless Tobacco at Sports Event Complaint
9,Department of Planning and Development,715,Home Buyer Program Info Request


### 8. provide a table displaying the Ward, the total number of complaints, the category with the highest number of complaints, and the category with the lowest number of complaints?

In [25]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT 
                ward_id as Ward
                ,COUNT(request_id) as Total_complaints
                ,MAX(category) as max_comp_categ
                ,MIN(category) as least_comp_categ
                FROM chicago_311_2018
                GROUP BY ward_id
                ORDER BY 2 DESC
            ;

''')

# Fetch all the results
results = c.fetchall()

# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Ward,Total_complaints,max_comp_categ,least_comp_categ
0,28.0,30398,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
1,34.0,17514,Yard Waste Pick-Up Request,Abandoned Vehicle Complaint
2,24.0,14216,Yard Waste Pick-Up Request,Abandoned Vehicle Complaint
3,27.0,13687,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
4,9.0,13483,Yard Waste Pick-Up Request,Abandoned Vehicle Complaint
5,8.0,11646,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
6,47.0,11078,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
7,21.0,11008,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
8,20.0,10751,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL
9,16.0,10718,Yard Waste Pick-Up Request,311 INFORMATION ONLY CALL


### 9. Fetch a table showing daily average complaints and daily maximum complaints received by each agency

In [26]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            WITH cte as
            (SELECT 
                responsibleagency as Agency
                ,COUNT(request_id) as Total_complaints
                ,CAST(julianday(substr(created_date, 1, 10)) AS date) as day
            FROM chicago_311_2018
            GROUP BY 1, 3)
            
            SELECT
                Agency
                , ROUND(AVG(Total_complaints)) as daily_avg_compl
                , MAX(Total_complaints) as daily_max_com
            FROM cte
            GROUP BY 1
            ORDER By 2 DESC
            ;

''')

# Fetch all the results
results = c.fetchall()

# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Agency,daily_avg_compl,daily_max_com
0,Streets and Sanitation,1311.0,3284
1,311 City Services,1272.0,1705
2,CDOT - Department of Transportation,704.0,1411
3,Aviation,250.0,805
4,DWM - Department of Water Management,129.0,798
5,DOB - Buildings,101.0,201
6,Animal Care and Control,98.0,146
7,BACP - Business Affairs and Consumer Protection,34.0,62
8,Health,10.0,21
9,Department of Planning and Development,4.0,14


### 10. Create a table that presents the total number of complaints handled by each agency, along with the average time taken to address each complaint.

In [27]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT 
                 responsibleagency as Agency
                ,COUNT(request_id) as Total_complaints
                ,ROUND(AVG(CAST(julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) AS INTEGER))) AS avg_completion_time_days

            FROM chicago_311_2018
            GROUP by 1
            ORDER BY 2 DESC
            
            ;

''')

# Fetch all the results
results = c.fetchall()

# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Agency,Total_complaints,avg_completion_time_days
0,Streets and Sanitation,241150,32.0
1,CDOT - Department of Transportation,129527,35.0
2,DWM - Department of Water Management,23645,39.0
3,DOB - Buildings,18572,43.0
4,Animal Care and Control,18065,24.0
5,311 City Services,17812,0.0
6,BACP - Business Affairs and Consumer Protection,6190,62.0
7,Aviation,3498,0.0
8,Health,1821,32.0
9,Department of Planning and Development,715,3.0


### 11. Calculate the percentage of complaints closed within 7 days of submission for each borough

In [28]:

c = conn.cursor()

# Execute the SQL query
c.execute('''         

        SELECT ward_id,
               COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) AS complaints_closed_within_7_days,
               ROUND((COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) * 100.0 / COUNT(*)),2) AS percentage
               ,Rank() OVER(ORDER BY ROUND((COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) * 100.0 / COUNT(*)),2) DESC) AS Rank
        FROM chicago_311_2018
        GROUP BY ward_id
        ORDER BY percentage DESC;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,ward_id,complaints_closed_within_7_days,percentage,Rank
0,28.0,25859,85.07,1
1,,2769,81.3,2
2,24.0,10633,74.8,3
3,47.0,8201,74.03,4
4,4.0,5409,70.83,5
5,25.0,6366,70.65,6
6,14.0,6916,70.11,7
7,3.0,5648,69.6,8
8,27.0,9504,69.44,9
9,32.0,6923,68.89,10


### 12. Find the week with the highest number of complaints and the week with the lowest number of complaints.

In [29]:

c = conn.cursor()

# Execute the SQL query
c.execute('''         

        SELECT ward_id,
               COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) AS complaints_closed_within_7_days,
               ROUND((COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) * 100.0 / COUNT(*)),2) AS percentage
               ,Rank() OVER(ORDER BY ROUND((COUNT(CASE WHEN julianday(substr(closed_date, 1, 10)) - julianday(substr(created_date, 1, 10)) <= 7 THEN 1 END) * 100.0 / COUNT(*)),2) DESC) AS Rank
        FROM chicago_311_2018
        GROUP BY ward_id
        ORDER BY percentage DESC;
            
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,ward_id,complaints_closed_within_7_days,percentage,Rank
0,28.0,25859,85.07,1
1,,2769,81.3,2
2,24.0,10633,74.8,3
3,47.0,8201,74.03,4
4,4.0,5409,70.83,5
5,25.0,6366,70.65,6
6,14.0,6916,70.11,7
7,3.0,5648,69.6,8
8,27.0,9504,69.44,9
9,32.0,6923,68.89,10


In [3]:


# Step 2: Read the CSV data
df_2019 = pd.read_csv('./chicago_311_2019.csv')


# Step 3: Connect to the SQLite database
db_file_path = './chicago_311_2018.db'
conn = sqlite3.connect(db_file_path)

# Step 4: Append data to the database
# Replace 'table_name' with the name of the table in the database where you want to append the data
# Replace 'if_exists' with 'append' to append the data to the existing table
# Replace 'index' with False to exclude the DataFrame index from being stored in the database
df_2019.to_sql('chicago_311_2018', conn, if_exists='append', index=False)

# Close the database connection
conn.close()


2075465

In [4]:

# c = conn.cursor()

# # Execute the SQL query
# c.execute('''         

#         SELECT *
#         FROM chicago_311_2018
# ;
            
#           ''')

# # Fetch all the results
# results = c.fetchall()
# # Convert the results to a DataFrame
# df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# # Display the DataFrame as a table
# df

Unnamed: 0,request_id,category,category_code,responsibleagency,status,created_date,last_modified_date,closed_date,address,community_area,ward_id,police_district_id,precinct,latitude,longitude
0,SR19-01338060,311 INFORMATION ONLY CALL,311IOC,311 City Services,Completed,2019-04-16 01:08:00+00,2020-02-13 11:26:00+00,2019-04-16 01:08:00+00,2111 W Lexington ST,28.0,28.0,12.0,46.0,41.871831,-87.679846
1,SR19-01054667,Street Light Out Complaint,SFD,CDOT - Department of Transportation,Completed,2019-02-22 00:05:00+00,2020-02-13 10:21:00+00,2019-07-01 09:05:00+00,2705 W 69th ST,,,,,,
2,SR19-01338063,Pothole in Street Complaint,PHF,CDOT - Department of Transportation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2019-04-16 01:11:00+00,3141 W BELMONT AVE,21.0,33.0,14.0,13.0,41.939324,-87.706393
3,SR19-01338064,Sidewalk Inspection Request,PBS,CDOT - Department of Transportation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2020-01-02 03:59:00+00,2029 N HOYNE AVE,22.0,32.0,14.0,5.0,41.918602,-87.679823
4,SR19-01338066,Garbage Cart Maintenance,SIE,Streets and Sanitation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2019-04-25 00:25:00+00,1230 E 63RD ST,42.0,20.0,3.0,34.0,41.780770,-87.595001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2075460,SR19-02910067,Tree Removal Request,SEE,Streets and Sanitation,Completed,2019-11-08 00:18:00+00,2020-03-30 00:21:00+00,2020-03-30 00:21:00+00,4137 N SPRINGFIELD AVE,16.0,39.0,17.0,28.0,41.956633,-87.725004
2075461,SR19-02970663,Tree Trim Request,SEF,Streets and Sanitation,Completed,2019-11-18 11:58:00+00,2020-03-30 00:47:00+00,2020-03-30 00:47:00+00,8159 W IRVING PARK RD,17.0,38.0,16.0,28.0,41.951859,-87.831624
2075462,SR19-02970668,Tree Trim Request,SEF,Streets and Sanitation,Completed,2019-11-18 11:59:00+00,2020-03-30 00:47:00+00,2020-03-30 00:47:00+00,8159 W IRVING PARK RD,17.0,38.0,16.0,28.0,41.951859,-87.831624
2075463,SR19-03071226,Tree Removal Request,SEE,Streets and Sanitation,Completed,2019-12-04 07:55:00+00,2020-03-30 00:42:00+00,2020-03-30 00:42:00+00,5831 N CENTRAL AVE,11.0,39.0,16.0,24.0,41.986807,-87.767953


In [5]:
from datetime import datetime as dt

In [26]:
df["created_date"].dtype

dtype('O')

In [34]:
df["created_date"] = pd.to_datetime(df["created_date"]) 


In [36]:
df["created_date"].max()

Timestamp('2019-12-31 11:59:00+0000', tz='UTC')

In [39]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT category as Complaint_category
            ,COUNT(category) as Counts
            ,Rank() over (ORDER BY COUNT(category) DESC) AS Rank
            FROM chicago_311_2018
            WHERE strftime('%Y', julianday(substr(closed_date, 1, 10))) = "2019"
            GROUP BY category
            ORDER BY 2 DESC
            LIMIT 10
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Complaint_category,Counts,Rank
0,311 INFORMATION ONLY CALL,647798,1
1,Aircraft Noise Complaint,413963,2
2,Street Light Out Complaint,99451,3
3,Graffiti Removal Request,92589,4
4,Pothole in Street Complaint,90981,5
5,Weed Removal Request,85827,6
6,Rodent Baiting/Rat Complaint,50829,7
7,Garbage Cart Maintenance,49761,8
8,Sign Repair Request - All Other Signs,33589,9
9,Abandoned Vehicle Complaint,27780,10


In [31]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT category as Complaint_category
            ,COUNT(category) as Counts
            ,Rank() over (ORDER BY COUNT(category) DESC) AS Rank
            FROM chicago_311_2019
            WHERE strftime('%Y', julianday(substr(closed_date, 1, 10))) = "2019"
            GROUP BY category
            ORDER BY 2 DESC
            LIMIT 10
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Complaint_category,Counts,Rank
0,311 INFORMATION ONLY CALL,647798,1
1,Aircraft Noise Complaint,413963,2
2,Street Light Out Complaint,99451,3
3,Graffiti Removal Request,92589,4
4,Pothole in Street Complaint,90981,5
5,Weed Removal Request,85827,6
6,Rodent Baiting/Rat Complaint,50829,7
7,Garbage Cart Maintenance,49761,8
8,Sign Repair Request - All Other Signs,33589,9
9,Abandoned Vehicle Complaint,27780,10


In [30]:
# loading the data into a Pandas DataFrame
chicago_311_2019 = pd.read_csv('./chicago_311_2019.csv')
# writing the data to a sqlite table
chicago_311_2019.to_sql('chicago_311_2019', conn, if_exists='append', index = False)

2075465

In [32]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT category as Complaint_category
            ,COUNT(category) as Counts
            ,Rank() over (ORDER BY COUNT(category) DESC) AS Rank
            FROM chicago_311_2018
            WHERE strftime('%Y', julianday(substr(closed_date, 1, 10))) = "2019"
            GROUP BY category
            ORDER BY 2 DESC
            LIMIT 10
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Complaint_category,Counts,Rank
0,Tree Trim Request,10053,1
1,Pothole in Street Complaint,7894,2
2,Tree Removal Request,7278,3
3,Street Light Out Complaint,6508,4
4,Garbage Cart Maintenance,5421,5
5,Alley Light Out Complaint,5087,6
6,Building Violation,2139,7
7,Sign Repair Request - All Other Signs,1886,8
8,Alley Pothole Complaint,1817,9
9,Sidewalk Inspection Request,1310,10


In [34]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT *           
            FROM chicago_311_2018
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df_2018 = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df_2018

Unnamed: 0,request_id,category,category_code,responsibleagency,status,created_date,last_modified_date,closed_date,address,community_area,ward_id,police_district_id,precinct,latitude,longitude
0,SR19-00332102,Tree Trim Request,SEF,Streets and Sanitation,Completed,2018-10-15 04:45:00+00,2020-02-13 07:19:00+00,2018-10-22 01:08:00+00,"1111 W ARDMORE AVE, CHICAGO, IL 60660",77.0,48.0,20.0,41.0,41.987279,-87.658649
1,SR18-00192004,Building Violation,BBA,DOB - Buildings,Completed,2018-12-18 09:49:00+00,2020-02-13 05:43:00+00,2019-03-05 11:14:00+00,,,,,,,
2,SR18-00194556,Aircraft Noise Complaint,AVN,Aviation,Completed,2018-12-18 08:40:00+00,2020-02-13 05:44:00+00,2018-12-18 08:40:00+00,,,,,,,
3,SR19-00332104,Vehicle Parked in Bike Lane Complaint,VBL,CDOT - Department of Transportation,Completed,2018-10-15 04:45:00+00,2020-02-13 07:19:00+00,2018-10-15 06:45:00+00,"123 N FRANKLIN ST, CHICAGO, IL 60606",32.0,42.0,1.0,9.0,41.883782,-87.635328
4,SR18-00233228,Aircraft Noise Complaint,AVN,Aviation,Completed,2018-12-25 10:52:00+00,2020-02-13 06:04:00+00,2018-12-25 10:52:00+00,8 NE R CRES,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461165,SR18-00116944,Sidewalk Inspection Request,PBS,CDOT - Department of Transportation,Completed,2018-09-05 03:03:00+00,2020-04-22 09:19:00+00,2020-04-22 09:19:00+00,"6131 S MENARD AVE, CHICAGO, IL 60638",64.0,13.0,8.0,17.0,41.780670,-87.766417
461166,SR18-00098412,Tree Planting Request,SED,Streets and Sanitation,Completed,2018-07-30 09:31:00+00,2020-04-22 09:38:00+00,2020-04-22 09:38:00+00,"9443 S WESTERN AVE, CHICAGO, IL 60643",72.0,19.0,22.0,5.0,41.721494,-87.682052
461167,SR18-00098413,Tree Planting Request,SED,Streets and Sanitation,Completed,2018-07-30 09:31:00+00,2020-04-22 09:32:00+00,2020-04-22 09:32:00+00,"9617 S WESTERN AVE, CHICAGO, IL 60643",72.0,19.0,22.0,2.0,41.718595,-87.681524
461168,SR18-00098414,Tree Planting Request,SED,Streets and Sanitation,Completed,2018-07-30 09:32:00+00,2020-04-22 09:24:00+00,2020-04-22 09:24:00+00,"9707 S WESTERN AVE, CHICAGO, IL 60643",72.0,19.0,22.0,2.0,41.717052,-87.681918


In [35]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT *           
            FROM chicago_311_2019
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df_2019 = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df_2019

Unnamed: 0,request_id,category,category_code,responsibleagency,status,created_date,last_modified_date,closed_date,address,community_area,ward_id,police_district_id,precinct,latitude,longitude
0,SR19-01338060,311 INFORMATION ONLY CALL,311IOC,311 City Services,Completed,2019-04-16 01:08:00+00,2020-02-13 11:26:00+00,2019-04-16 01:08:00+00,2111 W Lexington ST,28.0,28.0,12.0,46.0,41.871831,-87.679846
1,SR19-01054667,Street Light Out Complaint,SFD,CDOT - Department of Transportation,Completed,2019-02-22 00:05:00+00,2020-02-13 10:21:00+00,2019-07-01 09:05:00+00,2705 W 69th ST,,,,,,
2,SR19-01338063,Pothole in Street Complaint,PHF,CDOT - Department of Transportation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2019-04-16 01:11:00+00,3141 W BELMONT AVE,21.0,33.0,14.0,13.0,41.939324,-87.706393
3,SR19-01338064,Sidewalk Inspection Request,PBS,CDOT - Department of Transportation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2020-01-02 03:59:00+00,2029 N HOYNE AVE,22.0,32.0,14.0,5.0,41.918602,-87.679823
4,SR19-01338066,Garbage Cart Maintenance,SIE,Streets and Sanitation,Completed,2019-04-16 01:09:00+00,2020-02-13 11:26:00+00,2019-04-25 00:25:00+00,1230 E 63RD ST,42.0,20.0,3.0,34.0,41.780770,-87.595001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2075460,SR19-02910067,Tree Removal Request,SEE,Streets and Sanitation,Completed,2019-11-08 00:18:00+00,2020-03-30 00:21:00+00,2020-03-30 00:21:00+00,4137 N SPRINGFIELD AVE,16.0,39.0,17.0,28.0,41.956633,-87.725004
2075461,SR19-02970663,Tree Trim Request,SEF,Streets and Sanitation,Completed,2019-11-18 11:58:00+00,2020-03-30 00:47:00+00,2020-03-30 00:47:00+00,8159 W IRVING PARK RD,17.0,38.0,16.0,28.0,41.951859,-87.831624
2075462,SR19-02970668,Tree Trim Request,SEF,Streets and Sanitation,Completed,2019-11-18 11:59:00+00,2020-03-30 00:47:00+00,2020-03-30 00:47:00+00,8159 W IRVING PARK RD,17.0,38.0,16.0,28.0,41.951859,-87.831624
2075463,SR19-03071226,Tree Removal Request,SEE,Streets and Sanitation,Completed,2019-12-04 07:55:00+00,2020-03-30 00:42:00+00,2020-03-30 00:42:00+00,5831 N CENTRAL AVE,11.0,39.0,16.0,24.0,41.986807,-87.767953


In [36]:
c = conn.cursor()

# Execute the SQL query
c.execute('''
            SELECT DISTINCT(category) as Complaint_category
            FROM chicago_311_2018
          ''')

# Fetch all the results
results = c.fetchall()
# Convert the results to a DataFrame
df = pd.DataFrame(results, columns=[desc[0] for desc in c.description])

# Display the DataFrame as a table
df

Unnamed: 0,Complaint_category
0,Tree Trim Request
1,Building Violation
2,Aircraft Noise Complaint
3,Vehicle Parked in Bike Lane Complaint
4,Cab Feedback
...,...
87,Inaccurate Retail Scales Complaint
88,Smokeless Tobacco at Sports Event Complaint
89,Ridesharing Complaint
90,Petcoke Dust Complaint
