In [None]:
import pandas as pd

# Load the dataset, skipping the initial 4 rows where the data starts
data = pd.read_csv('/content/drive/MyDrive/ezajcs_demo.csv', delimiter=",", quotechar='"', skiprows=4)

# Check the number of columns in the loaded data
num_columns = len(data.columns)
print(f"Number of columns in the dataset: {num_columns}")

# Assign column names based on the actual number of columns
if num_columns == 15:
    # If there's an extra column
    data.columns = [
        "Year", "Total", "12 & under", "13 to 15", "16", "17 & over",
        "Male", "Female", "White", "Minority", "Black",
        "American Indian", "Asian/NHPI", "Hispanic", "Extra"
    ]
    # Drop the "Extra" column if it's present
    data_cleaned = data.drop(columns=["Extra"])
elif num_columns == 14:
    # If there are 14 columns as expected
    data.columns = [
        "Year", "Total", "12 & under", "13 to 15", "16", "17 & over",
        "Male", "Female", "White", "Minority", "Black",
        "American Indian", "Asian/NHPI", "Hispanic"
    ]
    data_cleaned = data
else:
    print("Unexpected number of columns in the dataset.")
    data_cleaned = data

# Drop rows where the 'Hispanic' column has zero values
data_cleaned = data_cleaned[data_cleaned['Hispanic'] != 0]

# Drop rows 37 through 40 as they contain metadata or notes
rows_to_remove_metadata = data_cleaned.index[37:41]
data_cleaned = data_cleaned.drop(rows_to_remove_metadata)

# Clean numeric columns by removing commas and converting to integers
numeric_columns = [
    "Total", "12 & under", "13 to 15", "16", "17 & over",
    "Male", "Female", "White", "Minority", "Black",
    "American Indian", "Asian/NHPI", "Hispanic"
]

data_cleaned[numeric_columns] = data_cleaned[numeric_columns].replace({',': ''}, regex=True)
data_cleaned[numeric_columns] = data_cleaned[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Display the cleaned data
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(data_cleaned.head())


Number of columns in the dataset: 15
   Year   Total  12 & under  13 to 15      16  17 & over    Male  Female  \
0  2021  437300       32300    197500  108200      99400  320800  116500   
1  2020  504800       38300    226600  123300     116600  369200  135600   
2  2019  711900       60900    326800  169200     155000  513700  198200   
3  2018  737200       62400    328700  179900     166100  535600  201500   
4  2017  782700       62600    345800  195900     178400  571600  211100   

    White  Minority   Black  American Indian  Asian/NHPI  Hispanic  
0  194300    243100  154200             9100        4900     74900  
1  219200    285600  175100            10500        6000     94000  
2  303300    408600  251700            13300        8400    135200  
3  324100    413100  253900            13200        8400    137600  
4  345100    437700  272900            13400        8500    142900  


In [None]:
# Define the path to save the cleaned data
cleaned_file_path = '/content/drive/MyDrive/ezajcs_cleaned.csv'

# Save the cleaned dataset
data_cleaned.to_csv(cleaned_file_path, index=False)

# Confirm the file is saved
print(f"Cleaned data saved to: {cleaned_file_path}")


Cleaned data saved to: /content/drive/MyDrive/ezajcs_cleaned.csv


In [4]:
# @title Tableau Dashboard

from IPython.display import HTML

HTML("""
<div class='tableauPlaceholder' id='viz1724960070319' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Ju&#47;JuvenileDelinquencyChart&#47;Dashboard1&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz'  style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='JuvenileDelinquencyChart&#47;Dashboard1' /><param name='tabs' value='yes' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Ju&#47;JuvenileDelinquencyChart&#47;Dashboard1&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /><param name='language' value='en-US' /></object></div>                <script type='text/javascript'>                    var divElement = document.getElementById('viz1724960070319');                    var vizElement = divElement.getElementsByTagName('object')[0];                    if ( divElement.offsetWidth > 800 ) { vizElement.style.minWidth='1024px';vizElement.style.maxWidth='100%';vizElement.style.minHeight='818px';vizElement.style.maxHeight=(divElement.offsetWidth*0.75)+'px';} else if ( divElement.offsetWidth > 500 ) { vizElement.style.minWidth='1024px';vizElement.style.maxWidth='100%';vizElement.style.minHeight='818px';vizElement.style.maxHeight=(divElement.offsetWidth*0.75)+'px';} else { vizElement.style.width='100%';vizElement.style.minHeight='1250px';vizElement.style.maxHeight=(divElement.offsetWidth*1.77)+'px';}                     var scriptElement = document.createElement('script');                    scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js';                    vizElement.parentNode.insertBefore(scriptElement, vizElement);                </script>
""")