In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import missingno as mn
import gradio as gr
import seaborn as sns
import plotly.express as px

In [None]:
data=pd.read_csv("/content/Los_Angeles_Crime.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data['VICTIM_AGE'].value_counts

In [None]:
mn.matrix(data)
plt.show()

In [None]:
plt.figure(figsize=(20,20))
plt.bar(data['CRIME_CD_DESC'], data['VICTIM_AGE'], color='blue', width=0.4)
plt.xticks(rotation=90)
plt.yticks(rotation=90)
plt.tight_layout()
plt.xlabel('Crime Description')
plt.ylabel('Victim Age')
plt.title('Crime Description vs Victim Age')
plt.show()

In [None]:
#without regression
sns.pairplot(data, kind="scatter")
plt.show()

In [None]:
def no_crime_by_area(data):
    no_crime_by_area = pd.DataFrame(data.groupby('AREA_NAME')['RECORD_NUMBER'].count()).reset_index().sort_values(by ='RECORD_NUMBER',ascending=False).head(10)
    fig = px.pie(no_crime_by_area, values='RECORD_NUMBER', names='AREA_NAME', title='Number Of Crimes By Area')
    fig.show()

no_crime_by_area(data)

In [None]:
print(data.columns)

In [None]:
def period_count(data, period, year=None, crime='all crimes', order=None): 
    '''
    Count the number of crimes in dataframe DF and in frequency of PERIOD. Crime type is CRIME and x range is ORDER.
    Return a histogram plot.
    '''
    plt.figure(figsize=(15,5))
    if year:
        plt.title("Number of crimes occured in a particular {} in {} for {}.".format(period, year, crime))
    else:
        plt.title("Number of crimes occured in a particular {} for {}.".format(period, crime))
    sns.countplot(x=period, data=data, order=order)
    plt.show()

In [None]:
def make_df_date(data):
    '''Takes a dataframe DF, return a new dataframe DF_DATE with columns: year, month, day, weekday'''
    df_date = pd.DataFrame()
    date = pd.to_datetime(data['DATE_OCCURRENCE'])
    df_date['date'] = date
    df_date['year'] = date.dt.year
    df_date['month'] = date.dt.month
    df_date['day'] = date.dt.day
    df_date['weekday'] = date.dt.weekday + 1 # Monday=1, Sunday=7
    
    return df_date

def make_df_date_period(df):
    '''
    Takes a dataframe DF, return a new dataframe DF_DATE with columns: year, month, day, weekday.
    As well as "sin_" and "cos_" to take the periodic behavior into consideration.
    This can be useful for better model performance
    '''
    df_date = pd.DataFrame()
    date = pd.to_datetime(df['DATE_OCCURRENCE'])
    df_date['date'] = date
    df_date['year'] = date.dt.year
    df_date['month'] = date.dt.month
    df_date['day'] = date.dt.day
    df_date['weekday'] = date.dt.weekday + 1 # Monday=1, Sunday=7
    
    df_date["sin_month"] = df_date["month"].apply(lambda x: np.sin(2*np.pi*x/12))
    df_date["cos_month"] = df_date["month"].apply(lambda x: np.cos(2*np.pi*x/12))

    days_in_month = [pd.Period(str(day)).days_in_month for day in date]
    day_percent = df_date["day"] / days_in_month
    df_date["sin_day"] = day_percent.apply(lambda x: np.sin(2*np.pi*x))
    df_date["cos_day"] = day_percent.apply(lambda x: np.cos(2*np.pi*x))

    df_date["sin_weekday"] = df_date["weekday"].apply(lambda x: np.sin(2*np.pi*x/7))
    df_date["cos_weekday"] = df_date["weekday"].apply(lambda x: np.cos(2*np.pi*x/7))
    
    return df_date

In [None]:
df_date = make_df_date(data)
df_date_period = make_df_date_period(data)

In [None]:
period_count(df_date, 'year', order=range(2018,2022))

In [None]:
plt.figure(figsize=(15,5))
df_date['date'][df_date['year']>=2020].value_counts().plot()
plt.xlabel('year')
plt.ylabel('count')
plt.title('Number of crimes per day')
plt.show()

In [None]:
from sklearn.linear_model import Ridge
model =  Ridge(max_iter=1000,  alpha=1)

In [None]:
def crime(RECORD_NUMBER, DATE_REPORTED, DATE_OCCURRENCE,
       TIME_OF_OCCURRENCE, AREA, AREA_NAME, RPT_DIST_NO, PART_1_2,
       CRIME_CODE, CRIME_CD_DESC, MOCODES, VICTIM_AGE, VICTIM_SEX,
       WEAPON_USED_CODE, WEAPON_DESC, STATUS, STATUS_DESC, LOCATION, CROSS_STREET, LAT,
       LON):
#turning the arguments into a numpy array  
  x = np.array([RECORD_NUMBER, DATE_REPORTED, DATE_OCCURRENCE, TIME_OF_OCCURRENCE, AREA, AREA_NAME, RPT_DIST_NO, PART_1_2, CRIME_CODE, CRIME_CD_DESC])
  prediction = model.predict(x.reshape(1, -1))

  return prediction

In [None]:
outputs = gr.outputs.Textbox()

app = gr.Interface(fn=crime, inputs=['number','number','number','number','number','number','number','number'], outputs=outputs,description="This is a crime prediction model")

  "Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components",
  f"Expected {max_args} arguments for function {fn}, received {arg_count}."
  f"Expected at least {min_args} arguments for function {fn}, received {arg_count}."


In [None]:
app.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Note: opening Chrome Inspector may crash demo inside Colab notebooks.

To create a public link, set `share=True` in `launch()`.


<IPython.core.display.Javascript object>



In [None]:
pip install gradio


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gradio
  Downloading gradio-3.12.0-py3-none-any.whl (11.6 MB)
[K     |████████████████████████████████| 11.6 MB 5.8 MB/s 
[?25hCollecting orjson
  Downloading orjson-3.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (278 kB)
[K     |████████████████████████████████| 278 kB 54.3 MB/s 
Collecting markdown-it-py[linkify,plugins]
  Downloading markdown_it_py-2.1.0-py3-none-any.whl (84 kB)
[K     |████████████████████████████████| 84 kB 3.9 MB/s 
[?25hCollecting httpx
  Downloading httpx-0.23.1-py3-none-any.whl (84 kB)
[K     |████████████████████████████████| 84 kB 4.3 MB/s 
Collecting fastapi
  Downloading fastapi-0.88.0-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 4.1 MB/s 
[?25hCollecting h11<0.13,>=0.11
  Downloading h11-0.12.0-py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 3.1 MB/s 
[?25hCollecting p