In [1]:
import requests
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

# Función de Extracción
def descargar_y_cargar_datos(url):
    response = requests.get(url)
    local_filename = 'Motor_Vehicle_Registrations.csv'
    with open(local_filename, 'wb') as file:
        file.write(response.content)
    data = pd.read_csv(local_filename)
    return data

# Función de Transformación
def transformar_datos(data):
    data_since_1980 = data[data['year'] >= 1980]
    data_since_1980['year'].replace({2017: 1917}, inplace=True)
    return data_since_1980

# Función de Modelado SARIMA
def predict_vehicle_type_sarima(data, vehicle_type):
    predictions = {}
    for state in data.index:
        state_data = data.loc[state].dropna()
        if len(state_data) > 2:
            diff_state_data = state_data.diff().dropna()
            result_diff = adfuller(diff_state_data)
            if result_diff[1] < 0.05:
                model = sm.tsa.statespace.SARIMAX(state_data, 
                                                  order=(1, 1, 1), 
                                                  seasonal_order=(1, 1, 1, 12),
                                                  enforce_stationarity=False, 
                                                  enforce_invertibility=False)
                results = model.fit(disp=False)
                forecast = results.get_forecast(steps=2).predicted_mean
                predictions[state] = {2019: forecast.iloc[0], 2020: forecast.iloc[1]}
            else:
                predictions[state] = {2019: None, 2020: None}
        else:
            predictions[state] = {2019: None, 2020: None}
    return pd.DataFrame(predictions).T

# Función para Combinar Datos
def combinar_datos(data_since_1980):
    # Crear DataFrames pivotados para cada tipo de vehículo
    pivot_auto = data_since_1980.pivot_table(index='state', columns='year', values='Auto', aggfunc='sum')
    pivot_bus = data_since_1980.pivot_table(index='state', columns='year', values='Bus', aggfunc='sum')
    pivot_truck = data_since_1980.pivot_table(index='state', columns='year', values='Truck', aggfunc='sum')
    pivot_motorcycle = data_since_1980.pivot_table(index='state', columns='year', values='Motorcycle', aggfunc='sum')

    # Realizar predicciones SARIMA para cada tipo de vehículo
    predicted_auto = predict_vehicle_type_sarima(pivot_auto, 'Auto')
    predicted_bus = predict_vehicle_type_sarima(pivot_bus, 'Bus')
    predicted_truck = predict_vehicle_type_sarima(pivot_truck, 'Truck')
    predicted_motorcycle = predict_vehicle_type_sarima(pivot_motorcycle, 'Motorcycle')

    # Unir las predicciones con los datos históricos
    final_auto = pivot_auto.join(predicted_auto, how='outer', rsuffix='_pred')
    final_bus = pivot_bus.join(predicted_bus, how='outer', rsuffix='_pred')
    final_truck = pivot_truck.join(predicted_truck, how='outer', rsuffix='_pred')
    final_motorcycle = pivot_motorcycle.join(predicted_motorcycle, how='outer', rsuffix='_pred')

    # Combinar en un DataFrame
    state_auto = final_auto.stack().reset_index().rename(columns={0: 'Auto'})
    state_bus = final_bus.stack().reset_index().rename(columns={0: 'Bus'})
    state_truck = final_truck.stack().reset_index().rename(columns={0: 'Truck'})
    state_motorcycle = final_motorcycle.stack().reset_index().rename(columns={0: 'Motorcycle'})

    combined_df = pd.merge(state_auto, state_bus, on=['state', 'year'], how='outer')
    combined_df = pd.merge(combined_df, state_truck, on=['state', 'year'], how='outer')
    combined_df = pd.merge(combined_df, state_motorcycle, on=['state', 'year'], how='outer')
    
    return combined_df

# Función para Mapear Estados y Guardar Datos
def mapear_y_guardar(combined_df, state_abbreviations):
    combined_df['state'] = combined_df['state'].map(state_abbreviations)
    combined_df['Total_Vehicles'] = combined_df[['Auto', 'Bus', 'Truck', 'Motorcycle']].sum(axis=1)
    combined_df.to_csv('vehicles_total_state-to2020.csv', index=False)

# Diccionario de abreviaturas de estados
state_abbreviations = {
    "Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR", "California": "CA", 
    "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE", "Florida": "FL", "Georgia": "GA", 
    "Hawaii": "HI", "Idaho": "ID", "Illinois": "IL", "Indiana": "IN", "Iowa": "IA", 
    "Kansas": "KS", "Kentucky": "KY", "Louisiana": "LA", "Maine": "ME", "Maryland": "MD", 
    "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN", "Mississippi": "MS", 
    "Missouri": "MO", "Montana": "MT", "Nebraska": "NE", "Nevada": "NV", "New Hampshire": "NH", 
    "New Jersey": "NJ", "New Mexico": "NM", "New York": "NY", "North Carolina": "NC", 
    "North Dakota": "ND", "Ohio": "OH", "Oklahoma": "OK", "Oregon": "OR", "Pennsylvania": "PA", 
    "Rhode Island": "RI", "South Carolina": "SC", "South Dakota": "SD", "Tennessee": "TN", 
    "Texas": "TX", "Utah": "UT", "Vermont": "VT", "Virginia": "VA", "Washington": "WA", 
    "West Virginia": "WV", "Wisconsin": "WI", "Wyoming": "WY", "Dist. of Col.": "DC", "District of Columbia": "DC"
}

# Ejecución del script
url = 'https://drive.google.com/uc?id=1TO-oi55hcf4FbBHWFhvIMsaLhdA1jLLs'
data = descargar_y_cargar_datos(url)
data_since_1980 = transformar_datos(data)
combined_df = combinar_datos(data_since_1980)
mapear_y_guardar(combined_df, state_abbreviations)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_since_1980['year'].replace({2017: 1917}, inplace=True)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  return get_prediction_index(

KeyError: 'year'