#Extração

In [None]:
import pandas as pd

url = "https://data.lacity.org/api/views/2nrs-mtv8/rows.csv"
dados = pd.read_csv(url)

#Transformação

In [None]:
'''
para reduzir para uma amostra com 90% de nível de confiança com 1% de margem
erro, o tamanho da amostra calculada
(site: https://www.qualtrics.com/blog/calculating-sample-size/ ) é:
'''
tamanho_amostra = 6707

In [None]:
import random

# Extrai uma amostra aleatória do DataFrame original
dados_sample = dados.sample(n=tamanho_amostra, random_state=42)

In [None]:
colunas_para_remover = ['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA',
                        'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Mocodes',
                        'Vict Descent', 'Premis Cd', 'Weapon Used Cd', 'Status',
                        'Status Desc', 'Crm Cd 1', 'Crm Cd 2', 'Crm Cd 3',
                        'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT', 'LON',
                        'Premis Desc', 'Weapon Desc']

# Usa o método 'drop' para remover colunas
dados_sample.reset_index(drop=True, inplace=True)
dados_sample.drop(columns=colunas_para_remover, inplace=True)

In [None]:
dados_sample

Unnamed: 0,AREA NAME,Crm Cd Desc,Vict Age,Vict Sex
0,Newton,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",42,M
1,Southwest,THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER),0,
2,Hollenbeck,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",65,M
3,Foothill,BURGLARY FROM VEHICLE,27,M
4,Mission,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,50,M
...,...,...,...,...
6702,Harbor,BURGLARY FROM VEHICLE,34,F
6703,Southeast,THEFT OF IDENTITY,28,F
6704,Harbor,CONTEMPT OF COURT,0,X
6705,West Valley,INTIMATE PARTNER - SIMPLE ASSAULT,25,F


In [None]:
dados_sample['Vict Age'] = dados_sample['Vict Age'].astype(int)

# Função para determinar a faixa etária com base na idade
def determinar_faixa_etaria(age):
    if age < 13:
        return 'child'
    elif age < 20:
        return 'teenager'
    elif age < 65:
        return 'adult'
    else:
        return 'elder'

# Cria a coluna 'Age Range' com base em 'Victm Age'
dados_sample['Age Range'] = dados_sample['Vict Age'].apply(determinar_faixa_etaria)

# Exibe o DataFrame resultante
dados_sample

Unnamed: 0,AREA NAME,Crm Cd Desc,Vict Age,Vict Sex,Age Range
0,Newton,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",42,M,adult
1,Southwest,THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER),0,,child
2,Hollenbeck,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",65,M,elder
3,Foothill,BURGLARY FROM VEHICLE,27,M,adult
4,Mission,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,50,M,adult
...,...,...,...,...,...
6702,Harbor,BURGLARY FROM VEHICLE,34,F,adult
6703,Southeast,THEFT OF IDENTITY,28,F,adult
6704,Harbor,CONTEMPT OF COURT,0,X,child
6705,West Valley,INTIMATE PARTNER - SIMPLE ASSAULT,25,F,adult


In [None]:
areas = sorted(dados_sample['AREA NAME'].unique())
areas

['77th Street',
 'Central',
 'Devonshire',
 'Foothill',
 'Harbor',
 'Hollenbeck',
 'Hollywood',
 'Mission',
 'N Hollywood',
 'Newton',
 'Northeast',
 'Olympic',
 'Pacific',
 'Rampart',
 'Southeast',
 'Southwest',
 'Topanga',
 'Van Nuys',
 'West LA',
 'West Valley',
 'Wilshire']

In [None]:
crime_type = sorted(dados_sample['Crm Cd Desc'].unique())
crime_type

['ARSON',
 'ASSAULT WITH DEADLY WEAPON ON POLICE OFFICER',
 'ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT',
 'ATTEMPTED ROBBERY',
 'BATTERY - SIMPLE ASSAULT',
 'BATTERY ON A FIREFIGHTER',
 'BATTERY POLICE (SIMPLE)',
 'BATTERY WITH SEXUAL CONTACT',
 'BIKE - STOLEN',
 'BOAT - STOLEN',
 'BOMB SCARE',
 'BRANDISH WEAPON',
 'BUNCO, ATTEMPT',
 'BUNCO, GRAND THEFT',
 'BUNCO, PETTY THEFT',
 'BURGLARY',
 'BURGLARY FROM VEHICLE',
 'BURGLARY FROM VEHICLE, ATTEMPTED',
 'BURGLARY, ATTEMPTED',
 'CHILD ABUSE (PHYSICAL) - AGGRAVATED ASSAULT',
 'CHILD ABUSE (PHYSICAL) - SIMPLE ASSAULT',
 'CHILD ANNOYING (17YRS & UNDER)',
 'CHILD NEGLECT (SEE 300 W.I.C.)',
 'CHILD PORNOGRAPHY',
 'CHILD STEALING',
 'CONTEMPT OF COURT',
 'CONTRIBUTING',
 'COUNTERFEIT',
 'CRIMINAL HOMICIDE',
 'CRIMINAL THREATS - NO WEAPON DISPLAYED',
 'CRM AGNST CHLD (13 OR UNDER) (14-15 & SUSP 10 YRS OLDER)',
 'CRUELTY TO ANIMALS',
 'DEFRAUDING INNKEEPER/THEFT OF SERVICES, $950 & UNDER',
 'DISCHARGE FIREARMS/SHOTS FIRED',
 'DISTURBING TH

In [None]:
ages_range = (dados_sample['Age Range'].unique())
ages_range

array(['adult', 'child', 'elder', 'teenager'], dtype=object)

In [None]:
victim_gender = (dados_sample['Vict Sex'].unique())
victim_gender

array(['X', nan, 'M', 'F'], dtype=object)

In [None]:
# Substitui 'X' por 'NA'
dados_sample['Vict Sex'] = dados_sample['Vict Sex'].replace('X', 'NA')

# Substitui 'F' por 'Woman'
dados_sample['Vict Sex'] = dados_sample['Vict Sex'].replace('F', 'Woman')

# Substitui 'M' e 'H' por 'Man'
dados_sample['Vict Sex'] = dados_sample['Vict Sex'].replace(['M', 'H'], 'Man')

In [None]:
import re

def classificar_categoria(descricao):
    descricao = descricao.upper()
    if re.search(r'\bSEXUAL\b|\bRAPE\b|\bMOLESTATION\b', descricao):
        return 'Sexual Crime'
    elif re.search(r'\bVIOLENCE\b|\bASSAULT\b|\bHOMICIDE\b', descricao):
        return 'Violence'
    elif re.search(r'\bROBBERY\b', descricao):
        return 'Robbery'
    elif re.search(r'\bBURGLARY\b|\bTHEFT\b|\bVANDALISM\b', descricao):
        return 'Crime Against Property'
    else:
        return 'Outros'

# Cria a coluna 'Categoria'
dados_sample['Categoria'] = dados_sample['Crm Cd Desc'].apply(classificar_categoria)

# Resultado
dados_sample

Unnamed: 0,AREA NAME,Crm Cd Desc,Vict Age,Vict Sex,Age Range,Categoria
0,Newton,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",42,Man,adult,Crime Against Property
1,Southwest,THEFT FROM MOTOR VEHICLE - PETTY ($950 & UNDER),0,,child,Crime Against Property
2,Hollenbeck,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",65,Man,elder,Violence
3,Foothill,BURGLARY FROM VEHICLE,27,Man,adult,Crime Against Property
4,Mission,THEFT FROM MOTOR VEHICLE - GRAND ($950.01 AND ...,50,Man,adult,Crime Against Property
...,...,...,...,...,...,...
6702,Harbor,BURGLARY FROM VEHICLE,34,Woman,adult,Crime Against Property
6703,Southeast,THEFT OF IDENTITY,28,Woman,adult,Crime Against Property
6704,Harbor,CONTEMPT OF COURT,0,,child,Outros
6705,West Valley,INTIMATE PARTNER - SIMPLE ASSAULT,25,Woman,adult,Violence


#Load

In [None]:
# Gera recomendações personalizadas
def generate_recommendation(row):
    victim_gender = row['Vict Sex']
    ages_range = row['Age Range']
    areas = row['AREA NAME']
    crime_type = row['Crm Cd Desc']

    if not pd.isnull(victim_gender) and not pd.isnull(ages_range):
        article = 'an' if ages_range[0].lower() in ['a', 'e', 'i', 'o', 'u'] else 'a'
        message = f"Hey, if you are {article} {ages_range} {victim_gender.lower()} and you are in {areas}, stay safe and be vigilant against {crime_type.lower()}."
        return message
    else:
        return ""

# Aplica a função ao DataFrame
dados_sample['Recommendation'] = dados_sample.apply(generate_recommendation, axis=1)

In [None]:
# Cria lista 'advices'
advices_list = []

# Preenche a lista 'advices_list' com recomendações personalizadas
for index, row in dados_sample.iterrows():
    gender = row['Vict Sex']
    age_range = row['Age Range']
    area = row['AREA NAME']
    recommendation = row['Recommendation']

    if recommendation:
        advices_list.append({'Gender': gender, 'Age Range': age_range, 'Area': area, 'Categoria': row['Categoria'], 'Recommendation': recommendation})

# Cria o DataFrame 'advices' usando pd.concat
advices = pd.concat([pd.DataFrame(entry, index=[0]) for entry in advices_list], ignore_index=True)

# Agrega as recomendações por colunas relevantes
advices_condensed = advices.groupby(['Gender', 'Age Range', 'Area', 'Categoria'])['Recommendation'].apply(lambda x: '\n'.join(x)).reset_index()

# Exibe informações
advices_condensed

Unnamed: 0,Gender,Age Range,Area,Categoria,Recommendation
0,Man,adult,77th Street,Crime Against Property,"Hey, if you are an adult man and you are in 77..."
1,Man,adult,77th Street,Outros,"Hey, if you are an adult man and you are in 77..."
2,Man,adult,77th Street,Robbery,"Hey, if you are an adult man and you are in 77..."
3,Man,adult,77th Street,Violence,"Hey, if you are an adult man and you are in 77..."
4,Man,adult,Central,Crime Against Property,"Hey, if you are an adult man and you are in Ce..."
...,...,...,...,...,...
577,Woman,teenager,West Valley,Crime Against Property,"Hey, if you are a teenager woman and you are i..."
578,Woman,teenager,West Valley,Robbery,"Hey, if you are a teenager woman and you are i..."
579,Woman,teenager,West Valley,Violence,"Hey, if you are a teenager woman and you are i..."
580,Woman,teenager,Wilshire,Crime Against Property,"Hey, if you are a teenager woman and you are i..."


In [None]:
# Crie um DataFrame vazio 'users' com as colunas necessárias
users = pd.DataFrame(columns=['Id', 'Name', 'Age', 'Area', 'Date'])

# Função para obter a data atual
def get_current_date():
    return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Função para adicionar um novo usuário ao DataFrame 'users'
def add_user(name, age, area):
    global users

    # Determina a faixa etária com base na idade fornecida pelo usuário
    age_range = determinar_faixa_etaria(age)

    # Gera a saudação com base na hora do sistema
    greeting = get_greeting()

    # Mensagem personalizada inicial
    message = f"{greeting}, {name}! Considering that you are heading to {area},"

    # Verifica se há dados correspondentes à área e à faixa etária no DataFrame 'advices_condensed'
    filtered_data = advices_condensed[
        (advices_condensed['Area'] == area) & (advices_condensed['Age Range'] == age_range)
    ]

    if not filtered_data.empty:
        # Determina o tipo de crime mais comum na área fornecida pelo usuário
        most_common_crime = filtered_data['Categoria'].mode().values[0]
        message += f"\nwe suggest you stay vigilant against {most_common_crime.lower()}."
    else:
        message += "\nwe couldn't find specific crime information for your area and age range."

    message += "\nWishing you a safe journey!"

    # Obter a data atual
    current_date = get_current_date()

    # Determina o próximo Id (autoincremental)
    next_id = len(users) + 1

    # Adiciona o novo usuário ao DataFrame
    users = users.append({
        'Id': next_id,
        'Name': name,
        'Age': age,
        'Gender': gender,
        'Area': area,
        'Date': current_date
    }, ignore_index=True)

    # Exibe a mensagem personalizada
    print(message)

# Solicita informações ao usuário
while True:
    name = input("Digite seu nome (ou 'exit' para sair): ")
    if name.lower() == 'exit':
        break
    age = int(input("Digite sua idade em anos: "))
    gender = input("Digite seu sexo (M/F/X): ")
    area = input("Digite a área para onde está indo (por exemplo, 'West LA', 'Topanga', etc.): ")

    # Adiciona o novo usuário ao DataFrame 'users' e exibir a mensagem personalizada
    add_user(name, age, area)


Digite seu nome (ou 'exit' para sair): Aaron
Digite sua idade em anos: 18
Digite seu sexo (M/F/X): M
Digite a área para onde está indo (por exemplo, 'West LA', 'Topanga', etc.): 77


  users = users.append({


Good evening, Aaron! Considering that you are heading to 77,
we couldn't find specific crime information for your area and age range.
Wishing you a safe journey!
Digite seu nome (ou 'exit' para sair): exit


In [None]:
# Exibir o DataFrame 'users' completo com todas as entradas
print(users)

  Id   Name Age Area                 Date Gender
0  1  Aaron  18   77  2023-09-28 03:33:01      M
