<a href="https://colab.research.google.com/github/rixprakash/DS2002F24/blob/main/project1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Data collected from https://opendata.charlottesville.org/

In [1]:
! git clone https://github.com/rixprakash/DS2002F24

Cloning into 'DS2002F24'...
remote: Enumerating objects: 372, done.[K
remote: Counting objects: 100% (143/143), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 372 (delta 128), reused 76 (delta 76), pack-reused 229 (from 1)[K
Receiving objects: 100% (372/372), 17.61 MiB | 11.68 MiB/s, done.
Resolving deltas: 100% (177/177), done.


In [2]:
import pandas as pd
import json
import sqlite3
import os

# Load the CSV file
file_path = '/content/DS2002F24/dataproject1/Crime_Data.csv'
df = pd.read_csv(file_path)
dfbefore = df.copy() # Copies the original file

# Function to add the new file for Exact Date Report
def add_exact_date_report_column(df):
    try:
        # Extract the date part (first 10 characters) from 'DateReported'
        df['ExactDateReport'] = df['DateReported'].str[:10]
    except Exception as e:
        print(f"Error adding 'ExactDateReport' column: {e}")
    return df

# Function to split Officer Name
def split_reporting_officer(df):
    try:
        # Split the 'ReportingOfficer' column by the comma
        df[['OfficerLastName', 'OfficerFirstName']] = df['ReportingOfficer'].str.split(', ', expand=True)
    except Exception as e:
        print(f"Error splitting 'ReportingOfficer' into 'OfficerFirstName' and 'OfficerLastName': {e}") # If an error occurs
    return df

# Function to convert CSV file to JSON
def convert_csv_to_json(df, file_path):
    try:
        json_data = df.to_json(orient='records')
        json_file_path = file_path.replace('.csv', '.json') # Changing to json
        with open(json_file_path, 'w') as json_file:
            json_file.write(json_data)
        print(f"CSV file converted to JSON and saved as {json_file_path}")
    except Exception as e:
        print(f"Error converting CSV to JSON: {e}")

# Function to convert CSV file to SQL
def convert_csv_to_sql(df, file_path, db_name='crime_data.db', table_name='crime_records'):
    try:
        # Connect to SQLite database (or create it if it doesn't exist)
        conn = sqlite3.connect(db_name)

        # Write the dataframe to the SQL database
        df.to_sql(table_name, conn, if_exists='replace', index=False) # Stores as a SQL database
        conn.close()
        print(f"CSV file converted to SQL and saved in {db_name} database, table: {table_name}")
    except Exception as e:
        print(f"Error converting CSV to SQL: {e}")

# Function to summarize data
def summarize_data(df):
    print(f"Number of records: {len(df)}")
    print(f"Number of columns: {len(df.columns)}")
    print(f"Columns: {list(df.columns)}\n")

# Main function to let the user choose
def main():
    df_modified = add_exact_date_report_column(df)
    df_modified = split_reporting_officer(df_modified)
    choice = input("Do you want to convert the CSV file to JSON or SQL Database? Enter 'JSON' or 'SQL': ")

    if choice == 'JSON':
        convert_csv_to_json(df, file_path)
    elif choice == 'SQL':
        convert_csv_to_sql(df, file_path)
    else:
        print("Invalid choice! Please enter 'JSON' or 'SQL'.") # Prompting user to put only these two options

    print("Pre-modified Data Summary:")
    summarize_data(dfbefore)
    print("Post-modified Summary:")
    summarize_data(df_modified)


if __name__ == "__main__":
    main()


Do you want to convert the CSV file to JSON or SQL Database? Enter 'JSON' or 'SQL': JSON
CSV file converted to JSON and saved as /content/DS2002F24/dataproject1/Crime_Data.json
Pre-modified Data Summary:
Number of records: 25111
Number of columns: 9
Columns: ['RecordID', 'Offense', 'IncidentID', 'BlockNumber', 'StreetName', 'Agency', 'DateReported', 'HourReported', 'ReportingOfficer']

Post-modified Summary:
Number of records: 25111
Number of columns: 12
Columns: ['RecordID', 'Offense', 'IncidentID', 'BlockNumber', 'StreetName', 'Agency', 'DateReported', 'HourReported', 'ReportingOfficer', 'ExactDateReport', 'OfficerLastName', 'OfficerFirstName']



In [3]:
import pandas as pd
import json
import sqlite3
import os

# Set file path (hardcoded)
file_path = '/content/DS2002F24/dataproject1/sportscarprice.json'

# Function to delete 'Engine Size (L)' column from a JSON file
def delete_engine_size_column_json(file_path):
    try:
        # Load the JSON file into a DataFrame
        with open(file_path, 'r') as json_file:
            json_data = json.load(json_file)
        df = pd.DataFrame(json_data) # Convert JSON data to a DataFrame for easier manipulation


        # Summary before processing (Ingestion Summary)
        print("Ingestion Summary:")
        print(f"Number of records: {len(df)}")
        print(f"Number of columns: {len(df.columns)}")

        # Drop the Engine Size (L) column
        df = df.drop(columns=['Engine Size (L)'])

        # Save the updated DataFrame back to JSON
        new_file_path = file_path.replace('.json', '_updated.json')
        df.to_json(new_file_path, orient='records', indent=4)

        # Summary after processing (Post-Processing Summary)
        print("\nPost-Processing Summary:")
        print(f"Number of records: {len(df)}")
        print(f"Number of columns: {len(df.columns)}")

        return df  # Return the modified DataFrame (optional)

    except Exception as e:
        print(f"Error processing JSON file: {e}")
        return None

# Function to convert DataFrame to CSV
def convert_json_to_csv(df, file_path):
    try:
        csv_file_path = file_path.replace('.json', '.csv') # Replace the file extension to create CSV file path
        df.to_csv(csv_file_path, index=False)
        print(f"JSON file converted to CSV and saved as {csv_file_path}")
    except Exception as e:
        print(f"Error converting JSON to CSV: {e}")

# Function to convert DataFrame to SQL
def convert_json_to_sql(df, file_path, db_name='json_data.db', table_name='json_records'):
    try:
        # Connecting to the SQLite database
        conn = sqlite3.connect(db_name)

        # Write the dataframe to the SQL database
        df.to_sql(table_name, conn, if_exists='replace', index=False) # Write DataFrame to SQL table, replace if exists
        conn.close()
        print(f"JSON file converted to SQL and saved in {db_name} database, table: {table_name}")
    except Exception as e:
        print(f"Error converting JSON to SQL: {e}")

# Main function to allow user to choose conversion type
def main():
    # Delete Engine Size (L) column
    df_modified = delete_engine_size_column_json(file_path)

    if df_modified is not None:
        choice = input("Do you want to convert the modified JSON file to CSV or SQL Database? Enter 'CSV' or 'SQL': ") # Prompting user choice

        if choice.upper() == 'CSV':
            convert_json_to_csv(df_modified, file_path)
        elif choice.upper() == 'SQL':
            convert_json_to_sql(df_modified, file_path)
        else:
            print("Invalid choice! Please enter 'CSV' or 'SQL'.")
    else:
        print("Unable to load the JSON file. Please check the file format.") # If it can not open

if __name__ == "__main__":
    main()


Ingestion Summary:
Number of records: 1007
Number of columns: 8

Post-Processing Summary:
Number of records: 1007
Number of columns: 7
Do you want to convert the modified JSON file to CSV or SQL Database? Enter 'CSV' or 'SQL': CSV
JSON file converted to CSV and saved as /content/DS2002F24/dataproject1/sportscarprice.csv
