In [7]:
import pandas as pd
import time
import requests
import urllib.parse

### Loading raw odometer data

In [None]:
# Load the dataset
file_path = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data-Full/merged_all_years.csv"
df = pd.read_csv(file_path)
print(f"Number of rows in the dataset: {df.shape[0]}")

In [None]:
import pyodbc
# Database connection
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=manpreets_asus;DATABASE=vPICList_Lite1;Trusted_Connection=yes;')
cursor = conn.cursor()

### Getting data for vin using NHTSA's VIN Decoder

In [None]:
# List to store results
all_results = []

# Loop through each VIN and fetch results
for vin in first_500_vins:
    cursor.execute("EXEC [dbo].[spVinDecode] @v = ?", vin)
    results = cursor.fetchall()

    if results:
        # Extract column names
        columns = [column[0] for column in cursor.description]

        # Append data with VIN included
        for row in results:
            row_dict = dict(zip(columns, row))
            row_dict["VIN"] = vin  # Add VIN as a separate column
            all_results.append(row_dict)

# Close database connection
cursor.close()
conn.close()

# Convert results to DataFrame
df_results = pd.DataFrame(all_results)

# Check for duplicates before pivoting
df_results = df_results.groupby(["VIN", "Variable"])["Value"].agg(lambda x: "; ".join(x.astype(str))).reset_index()

# Pivot the DataFrame: Convert 'Variable' column into column headers
df_pivot = df_results.pivot(index="VIN", columns="Variable", values="Value")

# Reset column names for clarity
df_pivot.reset_index(inplace=True)

# Save to CSV
output_file = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data/illinois_2023/decoded_vins_results.csv"
df_pivot.to_csv(output_file, index=False)

print(f"Results saved to {output_file}")

In [8]:
# Load car model dataset from CSV file
input_csv = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data/illinois_2023/decoded_vins_results.csv"
output_csv = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data/illinois_2023/vin_to_mpg_correct.csv"

# Read the CSV file into a DataFrame
df = pd.read_csv(input_csv)

In [9]:
models = df['Model'].tolist()  
model_years = df['Model Year'].tolist()
vins = df['VIN'].tolist() 

# List to store the results
results = []

### Then Getting mpg value using Ninja API

In [10]:
# Loop through each model and make the API request
for model, year, vin in zip(models, model_years, vins):
    #model_encoded = urllib.parse.quote(str(model))
    api_url = f'https://api.api-ninjas.com/v1/cars?model={model}&year={year}'
    headers = {'X-Api-Key': '1/aY3pF/oRUYY//kBd2WFw==mXNkrfZSlrNf9pQL'}  # Store in env variable or config file

    try:
        response = requests.get(api_url, headers=headers)

        if response.status_code == requests.codes.ok:
            car_data = response.json()
            combination_mpg = car_data[0].get('combination_mpg', 'Combination MPG not available') if car_data else 'No data found'
        else:
            combination_mpg = f"Error {response.status_code}"

    except Exception as e:
        combination_mpg = f"Request failed: {str(e)}"

    results.append({'VIN': vin, 'Model': model, 'Model Year': year, 'Combination MPG': combination_mpg})
    #time.sleep(1)  # Prevent API rate limits

results_df = pd.DataFrame(results)
results_df.to_csv(output_csv, index=False)

print(f"Results saved to {output_csv}")

Results saved to C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data/illinois_2023/vin_to_mpg_correct.csv


In [3]:
# Take model input from the user
model_input = input("Enter the model you want to query: ")
model_year = input("Enter the model year you want to query: ")

# Set the API URL with the model input
api_url = f'https://api.api-ninjas.com/v1/cars?model={model_input}&year={model_year}'

# Send the API request
response = requests.get(api_url, headers={'X-Api-Key': 'v+C3MLP3m3Khy1a4tYcuFQ==TXeu9XPZaFowb0b2'})

# Initialize the results list
results = []

if response.status_code == requests.codes.ok:
    car_data = response.json()
    # Extract the 'combination_mpg' value for the car model
    if car_data:
        combination_mpg = car_data[0].get('combination_mpg', 'Combination MPG not available')
    else:
        combination_mpg = 'No data found'
else:
    combination_mpg = f"Error {response.status_code}"

# Append the result for the single model
results.append({'model': model_input, 'combination_mpg': combination_mpg})

# Print the result
print(f"Model: {model_input}, Combination MPG: {combination_mpg}")

Enter the model you want to query:  M235i
Enter the model year you want to query:  2016


Model: M235i, Combination MPG: 24


### Getting the fuel type for the vehichle

In [None]:
# Load the dataset (update the file path accordingly)
file_path = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data-Full/Everything_Merged_dataset.csv"
df = pd.read_csv(file_path)

# Print the column names
print(df.columns)
print(len(df))

In [None]:
# Create a new column for Fuel Type if it doesn't exist
if 'Fuel_Type' not in df.columns:
    df['Fuel_Type'] = None  

chunk_size = 100000
output_file_path = "C:/Users/kaur6/Downloads/Urban Analytics/Odometer Data-Full/VIN_FuelType_Processed.csv"

# Iterate through VINs in chunks
for start in range(0, len(df), chunk_size):
    end = min(start + chunk_size, len(df))
    chunk = df.iloc[start:end]

    for index, row in chunk.iterrows():
        vin = row['vin']  # Ensure 'vin' matches the exact column name in your dataset
        cursor.execute("EXEC [dbo].[spVinDecode] @v = ?", vin)
        result = cursor.fetchall()

        # Extract fuel type
        for res in result:
            if res[1] == 'Fuel Type - Primary':
                df.at[index, 'Fuel_Type'] = res[2]
                break  # Stop iterating once we get the primary fuel type

    # Save only the 'vin' and 'Fuel_Type' columns
    df.iloc[:end][['vin', 'Fuel_Type']].to_csv(output_file_path, index=False)

    print(f"Processed rows {start} to {end}. Results saved.")

# Close connection
cursor.close()
conn.close()

print(f"Final dataset saved at: {output_file_path}")