In [566]:
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os
load_dotenv()

True

# Carga de las tablas

In [567]:
# Importa del archivo .env los datos de conexión a MySQl de forma segura
host = os.getenv('host')
database = os.getenv('database')
user = os.getenv('user')
port = os.getenv('port')
password = os.getenv('password')

In [568]:
# Crea la conexión a la base de datos MySQL utilizando la bublioteca SQLAlchemy
engine = create_engine(f'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}')

In [569]:
# Importa la tabla limpia de MySQL accident
accident = pd.read_sql('SELECT * from final_project.accident', con=engine,dtype='object')

# Importa la tabla limpia de MySQL distract
distract = pd.read_sql('SELECT * from final_project.distract', con=engine,dtype='object')

# Importa la tabla limpia de MySQL drugs
drugs = pd.read_sql('SELECT * from final_project.drugs', con=engine,dtype='object')

# Importa la tabla limpia de MySQL maneuver
maneuver = pd.read_sql('SELECT * from final_project.maneuver', con=engine,dtype='object')

# Importa la tabla limpia de MySQL person
person = pd.read_sql('SELECT * from final_project.person', con=engine,dtype='object')

# Importa la tabla limpia de MySQL vehicle
vehicle = pd.read_sql('SELECT * from final_project.vehicle', con=engine,dtype='object')

# Importa la tabla limpia de MySQL weather
weather = pd.read_sql('SELECT * from final_project.weather', con=engine,dtype='object')

In [570]:
person

Unnamed: 0,ID,ST_CASE,VEH_NO,PER_NO,AGE,SEX,TYPE_OF_PERSON,TYPE_OF_PERSONNAME,INJ_SEV,INJ_SEVNAME,SEAT_POS,SEAT_POSNAME,REST_MIS,REST_MISNAME,HELM_MIS,HELM_MISNAME,DRINKING,DRINKINGNAME
0,1,10001,1,1,37,Male,1,Driver,4,Fatal Injury,11,Front left,7,None Used/Not Applicable,7,None Used/Not Applicable,0,No (Alcohol Not Involved)
1,2,10001,2,1,58,Female,1,Driver,0,No Apparent Injury,11,Front left,0,No Indication of Misuse,7,None Used/Not Applicable,0,No (Alcohol Not Involved)
2,3,10001,2,2,60,Female,2,Passenger,0,No Apparent Injury,13,Front right,0,No Indication of Misuse,7,None Used/Not Applicable,8,Not Reported
3,4,10002,1,1,57,Female,1,Driver,4,Fatal Injury,11,Front left,0,No Indication of Misuse,7,None Used/Not Applicable,9,Reported as Unknown
4,5,10002,2,1,55,Male,1,Driver,4,Fatal Injury,11,Front left,0,No Indication of Misuse,7,None Used/Not Applicable,9,Reported as Unknown
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95752,95753,560117,1,1,43,Male,1,Driver,4,Fatal Injury,11,Front left,7,None Used/Not Applicable,7,None Used/Not Applicable,1,Yes (Alcohol Involved)
95753,95754,560118,1,1,30,Male,1,Driver,0,No Apparent Injury,11,Front left,7,None Used/Not Applicable,7,None Used/Not Applicable,1,Yes (Alcohol Involved)
95754,95755,560118,1,2,43,Female,2,Passenger,4,Fatal Injury,23,Back right,7,None Used/Not Applicable,7,None Used/Not Applicable,8,Not Reported
95755,95756,560118,1,3,30,Male,2,Passenger,3,Suspected Serious Injury,21,Back left,7,None Used/Not Applicable,7,None Used/Not Applicable,8,Not Reported


# Creación del dataframe final

## EDA y unión de tablas

In [571]:
# Elimina las columnas innecesarias de 'person'
columns_to_drop = ['ID', 'TYPE_OF_PERSON', 'INJ_SEV', 'SEAT_POS', 'REST_MIS', 'HELM_MIS', 'DRINKING']
person = person.drop(columns=columns_to_drop)

In [572]:
# Convierte la columna categórica SEX en booleana
person = pd.get_dummies(person, columns = ['SEX', 'TYPE_OF_PERSONNAME', 'INJ_SEVNAME', 'SEAT_POSNAME', 'REST_MISNAME', 'HELM_MISNAME', 'DRINKINGNAME'])

In [573]:
# Crea una función que suma los 'True' de los valores booleanos
def true_sum(series):
    return series.sum()

In [574]:
person = person.groupby('ST_CASE').agg({'VEH_NO':'nunique', 'PER_NO':'count', 'AGE':'mean', 'SEX_Female':true_sum, 'SEX_Male':true_sum, 'SEX_Unknown':true_sum, 'TYPE_OF_PERSONNAME_Bicyclist':true_sum,\
    'TYPE_OF_PERSONNAME_Bicyclist or similar':true_sum, 'TYPE_OF_PERSONNAME_Driver':true_sum, 'TYPE_OF_PERSONNAME_Occupant parked vehicle':true_sum, 'TYPE_OF_PERSONNAME_Passenger':true_sum,\
        'TYPE_OF_PERSONNAME_Pedestrian':true_sum, 'TYPE_OF_PERSONNAME_Person in a building':true_sum, 'TYPE_OF_PERSONNAME_Unknown':true_sum, 'INJ_SEVNAME_Died Prior to Crash':true_sum,\
            'INJ_SEVNAME_Fatal Injury':true_sum, 'INJ_SEVNAME_Injured, Severity Unknown':true_sum, 'INJ_SEVNAME_No Apparent Injury':true_sum, 'INJ_SEVNAME_Possible Injury':true_sum,\
                'INJ_SEVNAME_Suspected Minor Injury':true_sum, 'INJ_SEVNAME_Suspected Serious Injury':true_sum, 'INJ_SEVNAME_Unknown/Not Reported':true_sum, 'SEAT_POSNAME_Back':true_sum,\
                    'SEAT_POSNAME_Back left':true_sum, 'SEAT_POSNAME_Back middle':true_sum, 'SEAT_POSNAME_Back right':true_sum, 'SEAT_POSNAME_Front':true_sum, 'SEAT_POSNAME_Front left':true_sum,\
                        'SEAT_POSNAME_Front middle':true_sum, 'SEAT_POSNAME_Front right':true_sum, 'SEAT_POSNAME_Not regular vehicle':true_sum, 'SEAT_POSNAME_Unknown':true_sum, 'REST_MISNAME_No Indication of Misuse':true_sum,\
                            'REST_MISNAME_None Used/Not Applicable':true_sum, 'REST_MISNAME_Not a Motor Vehicle Occupant':true_sum, 'REST_MISNAME_Yes, Indication of Misuse':true_sum,\
                                'HELM_MISNAME_No Indication of Misuse':true_sum, 'HELM_MISNAME_None Used/Not Applicable':true_sum, 'HELM_MISNAME_Not a Motor Vehicle Occupant':true_sum,\
                                    'HELM_MISNAME_Yes, Indication of Misuse':true_sum, 'DRINKINGNAME_No (Alcohol Not Involved)':true_sum,'DRINKINGNAME_Not Reported':true_sum, 'DRINKINGNAME_Reported as Unknown':true_sum,\
                                        'DRINKINGNAME_Yes (Alcohol Involved)':true_sum})

In [575]:
# Elimina las columnas innecesarias de 'vehicle'
columns_to_drop = ['ID', 'VEHICLE_TYPE', 'VEHICLE_MANUFACTURER', 'VEHICLE_MODEL', 'VEHICLE_MODELNAME', 'VEHICLE_CLASS', 'VEHICLE_CLASSNAME', 'VEHICLE_WEIGHT', 'BUS_USE', 'BUS_USENAME', 'SPEC_USE', 'SPEC_USENAME', 'EMER_USE', 'EMER_USENAME',\
    'ROLLOVER', 'ROLLOVERNAME', 'ROLINLOC', 'ROLINLOCNAME', 'IMPACT1', 'TOWED', 'TOWEDNAME', 'ROAD_CONDITION', 'ACC_TYPE', 'DRIVERS_PRESENCE', 'DRIVERS_PRESENCENAME', 'LICENSE_COMPLIANCE', 'LICENSE_RESTRICTION',\
        'SPEEDREL', 'HIT_RUN', 'VEHICLE_MANUFACTURERNAME']
vehicle = vehicle.drop(columns=columns_to_drop)

In [576]:
# Convierte las columnas categóricas en booleanas
vehicle = pd.get_dummies(vehicle, columns = ['VEHICLE_TYPENAME', 'VEHICLE_WEIGHTNAME', 'IMPACT1NAME', 'ROAD_CONDITIONNAME', 'ACC_TYPENAME', 'LICENSE_COMPLIANCENAME',\
    'LICENSE_RESTRICTIONNAME', 'SPEEDRELNAME', 'HIT_RUNNAME'])

In [577]:
# Filtra los valores de MOD_YEAR entre 1900 y 2022
vehicle = vehicle[(vehicle['MOD_YEAR'] >= 1900) & (vehicle['MOD_YEAR'] <= 2022)]

In [578]:
vehicle = vehicle.groupby('ST_CASE').agg({'VEH_NO':'nunique', 'MOD_YEAR':'mean', 'NUMOCCS':'nunique', 'PREVIOUS_ACCIDENT':'mean', 'PREVIOUS_SUSPENSION':'mean', 'PREVIOUS_DRIVING_WHILE_INTOXIDATED':'mean', 'PREVIOUS_SPEED_CONVICT':'mean',\
    'VEHICLE_TYPENAME_2-door sedan':true_sum, 'VEHICLE_TYPENAME_All-Terrain Cycle':true_sum, 'VEHICLE_TYPENAME_Body type':true_sum, 'VEHICLE_TYPENAME_Bus':true_sum, 'VEHICLE_TYPENAME_Cab Chassis Based':true_sum,\
        'VEHICLE_TYPENAME_Compact Utility':true_sum, 'VEHICLE_TYPENAME_Convertible':true_sum, 'VEHICLE_TYPENAME_Coupe':true_sum, 'VEHICLE_TYPENAME_Golf Cart':true_sum, 'VEHICLE_TYPENAME_Large utility':true_sum,\
            'VEHICLE_TYPENAME_Low Speed Vehicle':true_sum, 'VEHICLE_TYPENAME_Minivan':true_sum, 'VEHICLE_TYPENAME_Moped':true_sum, 'VEHICLE_TYPENAME_Motor Scooter':true_sum, 'VEHICLE_TYPENAME_Motor home':true_sum,\
                'VEHICLE_TYPENAME_Motorcycle':true_sum, 'VEHICLE_TYPENAME_Motored cycle type':true_sum, 'VEHICLE_TYPENAME_Not Reported':true_sum, 'VEHICLE_TYPENAME_Pickup':true_sum,\
                    'VEHICLE_TYPENAME_Recreational Off-Highway Vehicle':true_sum, 'VEHICLE_TYPENAME_School Bus':true_sum, 'VEHICLE_TYPENAME_Sedan':true_sum, 'VEHICLE_TYPENAME_Snowmobile':true_sum,\
                        'VEHICLE_TYPENAME_Solar electric':true_sum, 'VEHICLE_TYPENAME_Station Wagon':true_sum, 'VEHICLE_TYPENAME_Truck':true_sum, 'VEHICLE_TYPENAME_Truck-tractor':true_sum, 'VEHICLE_TYPENAME_Utility vehicle':true_sum,\
                            'VEHICLE_TYPENAME_Van':true_sum, 'VEHICLE_TYPENAME_Van-Based Bus GVWR':true_sum,'VEHICLE_WEIGHTNAME_11,794 - 14,969 kg':true_sum, 'VEHICLE_WEIGHTNAME_14,969 kg and above':true_sum,\
                                'VEHICLE_WEIGHTNAME_2,722 - 4,536 kg':true_sum, 'VEHICLE_WEIGHTNAME_2,722 kg or less':true_sum, 'VEHICLE_WEIGHTNAME_4,536 - 6,350 kg':true_sum,\
                                    'VEHICLE_WEIGHTNAME_6,350 - 7,258 kg':true_sum, 'VEHICLE_WEIGHTNAME_7,258 - 8,845 kg':true_sum, 'VEHICLE_WEIGHTNAME_8,845 - 11,794 kg':true_sum,\
                                        'VEHICLE_WEIGHTNAME_Unknown':true_sum, 'IMPACT1NAME_Back':true_sum, 'IMPACT1NAME_Front':true_sum, 'IMPACT1NAME_Left':true_sum,\
                                            'IMPACT1NAME_Left-Back':true_sum, 'IMPACT1NAME_Left-Front':true_sum, 'IMPACT1NAME_Non-Collision':true_sum, 'IMPACT1NAME_Right':true_sum,\
                                                'IMPACT1NAME_Right-Back':true_sum, 'IMPACT1NAME_Right-Front':true_sum, 'IMPACT1NAME_Top':true_sum, 'IMPACT1NAME_Undercarriage':true_sum,\
                                                    'IMPACT1NAME_Unknown':true_sum, 'IMPACT1NAME_Vehicle, Objects or Person Set-in-motion':true_sum, 'ROAD_CONDITIONNAME_Dry':true_sum,\
                                                        'ROAD_CONDITIONNAME_Ice':true_sum, 'ROAD_CONDITIONNAME_Mud, Dirt or Gravel':true_sum, 'ROAD_CONDITIONNAME_Non-Trafficway':true_sum,\
                                                            'ROAD_CONDITIONNAME_Oil':true_sum, 'ROAD_CONDITIONNAME_Snow':true_sum, 'ROAD_CONDITIONNAME_Unknown':true_sum, 'ROAD_CONDITIONNAME_Water':true_sum,\
                                                                'ROAD_CONDITIONNAME_Wet':true_sum, 'ACC_TYPENAME_Avoid Collision With Object':true_sum, 'ACC_TYPENAME_Avoid Collision With Veh., Ped., Anim.':true_sum,\
                                                                    'ACC_TYPENAME_Avoid Collision With Vehicle':true_sum, 'ACC_TYPENAME_Avoid Collision with Vehicle':true_sum, 'ACC_TYPENAME_Backing Veh.':true_sum,\
                                                                        'ACC_TYPENAME_Changing Lanes to the Left':true_sum, 'ACC_TYPENAME_Changing Lanes to the Right':true_sum, 'ACC_TYPENAME_Control/Traction Loss':true_sum,\
                                                                            'ACC_TYPENAME_Decelerating (Slowing)':true_sum, 'ACC_TYPENAME_Decelerating (Slowing), Going Left':true_sum,\
                                                                                'ACC_TYPENAME_Decelerating (Slowing), Going Right':true_sum, 'ACC_TYPENAME_Decelerating (Slowing), Going Straight':true_sum,\
                                                                                    'ACC_TYPENAME_Drive Off Road':true_sum, 'ACC_TYPENAME_End Departure':true_sum,\
                                                                                        'ACC_TYPENAME_Initial Opposite Directions (Going Straight)':true_sum, 'ACC_TYPENAME_Initial Opposite Directions (Left/Right)':true_sum,\
                                                                                            'ACC_TYPENAME_Initial Same Directions (Going Straight)':true_sum, 'ACC_TYPENAME_Initial Same Directions (Turning Left)':true_sum, 'ACC_TYPENAME_Initial Same Directions (Turning Right)':true_sum,\
                                                                                                'ACC_TYPENAME_Lateral Move (Going Straight)':true_sum, 'ACC_TYPENAME_Lateral Move (Left/Right)':true_sum,\
                                                                                                    'ACC_TYPENAME_No Impact':true_sum, 'ACC_TYPENAME_Other Crash Type':true_sum, 'ACC_TYPENAME_Other Vehicle':true_sum,\
                                                                                                        'ACC_TYPENAME_Parked Veh.':true_sum, 'ACC_TYPENAME_Pedestrian/ Animal':true_sum, 'ACC_TYPENAME_Slower':true_sum,\
                                                                                                            'ACC_TYPENAME_Slower, Going Left':true_sum, 'ACC_TYPENAME_Slower, Going Right':true_sum,\
                                                                                                                'ACC_TYPENAME_Slower, Going Straight':true_sum, 'ACC_TYPENAME_Specifics Other':true_sum,\
                                                                                                                    'ACC_TYPENAME_Specifics Unknown':true_sum, 'ACC_TYPENAME_Sta. Object':true_sum, 'ACC_TYPENAME_Stopped':true_sum,\
                                                                                                                        'ACC_TYPENAME_Straight Ahead on Left':true_sum, 'ACC_TYPENAME_Straight Ahead on Left/Right':true_sum,\
                                                                                                                            'ACC_TYPENAME_Striking from the Left':true_sum, 'ACC_TYPENAME_Striking from the Right':true_sum,\
                                                                                                                                 'ACC_TYPENAME_Struck on the Right':true_sum, 'ACC_TYPENAME_Struck on the left':true_sum,\
                                                                                                                                    'ACC_TYPENAME_Turn Into Opposite Directions (Going Straight)':true_sum,\
                                                                                                                                        'ACC_TYPENAME_Turn Into Opposite Directions (Turning Left)':true_sum,\
                                                                                                                                            'ACC_TYPENAME_Turn Into Opposite Directions (Turning Right)':true_sum,\
                                                                                                                                                'ACC_TYPENAME_Turn Into Same Direction (Going Straight)':true_sum,\
                                                                                                                                                    'ACC_TYPENAME_Turn Into Same Direction (Turning Left)':true_sum,\
                                                                                                                                                        'ACC_TYPENAME_Turn Into Same Direction (Turning Right)':true_sum,\
                                                                                                                                                            'ACC_TYPENAME_Unknown':true_sum,\
                                                                                                                                                                'LICENSE_COMPLIANCENAME_No Driver Present/Unknown if Driver Present':true_sum,\
                                                                                                                                                                    'LICENSE_COMPLIANCENAME_No license required for this class vehicle':true_sum,\
                                                                                                                                                                        'LICENSE_COMPLIANCENAME_No valid license for this class vehicle':true_sum,\
                                                                                                                                                                            'LICENSE_COMPLIANCENAME_Not licensed':true_sum, 'LICENSE_COMPLIANCENAME_Unknown':true_sum,\
                                                                                                                                                                                'LICENSE_COMPLIANCENAME_Valid license for this class vehicle':true_sum,\
                                                                                                                                                                                    'LICENSE_RESTRICTIONNAME_No Driver Present/Unknown if Driver Present':true_sum,\
                                                                                                                                                                                        'LICENSE_RESTRICTIONNAME_No Restrictions or Not Applicable':true_sum,\
                                                                                                                                                                                            'LICENSE_RESTRICTIONNAME_Restrictions Complied With':true_sum,\
                                                                                                                                                                                                'LICENSE_RESTRICTIONNAME_Restrictions Not Complied With':true_sum,\
                                                                                                                                                                                                    'LICENSE_RESTRICTIONNAME_Restrictions, Compliance Unknown':true_sum,\
                                                                                                                                                                                                        'LICENSE_RESTRICTIONNAME_Unknown':true_sum, 'SPEEDRELNAME_No':true_sum,\
                                                                                                                                                                                                            'SPEEDRELNAME_Unknown':true_sum, 'SPEEDRELNAME_Yes':true_sum,\
                                                                                                                                                                                                                'HIT_RUNNAME_No':true_sum, 'HIT_RUNNAME_Yes':true_sum})

In [579]:
# Elimina las columnas innecesarias de 'distract'
columns_to_drop = ['ID', 'VEH_NO', 'DRDISTRACT']
distract = distract.drop(columns=columns_to_drop)

In [580]:
# Convierte las columnas categóricas en booleanas
distract = pd.get_dummies(distract, columns = ['DRDISTRACTNAME'])

In [581]:
distract = distract.groupby('ST_CASE').agg({'DRDISTRACTNAME_Distraction':true_sum, 'DRDISTRACTNAME_Eating/Drinking':true_sum, 'DRDISTRACTNAME_Mobile':true_sum, 'DRDISTRACTNAME_Not Distracted':true_sum,\
    'DRDISTRACTNAME_Smoking':true_sum, 'DRDISTRACTNAME_Unknown':true_sum})

In [582]:
# Elimina las columnas innecesarias de 'drugs'
columns_to_drop = ['ID', 'VEH_NO', 'PER_NO', 'DRUGRES']
drugs = drugs.drop(columns=columns_to_drop)

In [583]:
# Convierte las columnas categóricas en booleanas
drugs = pd.get_dummies(drugs, columns = ['DRUGRESNAME'])

In [584]:
drugs = drugs.groupby('ST_CASE').agg({'DRUGRESNAME_Anesthetic':true_sum, 'DRUGRESNAME_Antidepressant':true_sum, 'DRUGRESNAME_Antipsychotic':true_sum, 'DRUGRESNAME_Barbiturate':true_sum,\
    'DRUGRESNAME_Depressant':true_sum, 'DRUGRESNAME_Negative':true_sum, 'DRUGRESNAME_OTC Analgesic':true_sum, 'DRUGRESNAME_Opioid':true_sum, 'DRUGRESNAME_Other':true_sum, 'DRUGRESNAME_Stimulant':true_sum,\
        'DRUGRESNAME_Synthetic Cannabinoid':true_sum, 'DRUGRESNAME_Unknown':true_sum})

In [585]:
# Elimina las columnas innecesarias de 'maneuver'
columns_to_drop = ['ID', 'VEH_NO', 'MANEUVER']
maneuver = maneuver.drop(columns=columns_to_drop)

In [586]:
# Convierte las columnas categóricas en booleanas
maneuver = pd.get_dummies(maneuver, columns = ['MANEUVERNAME'])

In [587]:
maneuver = maneuver.groupby('ST_CASE').agg({'MANEUVERNAME_Avoid Obstacle':true_sum, 'MANEUVERNAME_Contact Vehicle':true_sum, 'MANEUVERNAME_No Maneuver':true_sum, 'MANEUVERNAME_Road Condition':true_sum,\
    'MANEUVERNAME_Unknown':true_sum})

In [588]:
# Elimina las columnas innecesarias de 'weather'
columns_to_drop = ['ID', 'WEATHER']
weather = weather.drop(columns=columns_to_drop)

In [589]:
# Convierte las columnas categóricas en booleanas
weather = pd.get_dummies(weather, columns = ['WEATHERNAME'])

In [590]:
weather = weather.groupby('ST_CASE').agg({'WEATHERNAME_Clear':true_sum, 'WEATHERNAME_Cloudy':true_sum, 'WEATHERNAME_Freezing':true_sum, 'WEATHERNAME_Other':true_sum, 'WEATHERNAME_Rain':true_sum, 'WEATHERNAME_Snow':true_sum,\
    'WEATHERNAME_Unknown':true_sum, 'WEATHERNAME_Windy':true_sum})