In [178]:
import datetime
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression

In [89]:
pd.set_option('display.max_columns',None)

In [90]:
data_path = 'data/maverick_data_processed.csv'
df = pd.read_csv(data_path)

In [91]:
dedupe_cols = ['condition', 'model_year', 'make',
               'model', 'trim_level', 'list_price',
               'exterior_color', 'interior_color',
                'hybrid_or_eco', 'mileage', 'dealer_address']

In [73]:
#df.drop_duplicates(subset=dedupe_cols).shape # many duplicates
#len(df.date_parsed.unique()) # 11 days of parsing

11

In [96]:
# set a date
date = df.date_parsed.unique()[0]

In [97]:
# generate box and whisker plot
def box_and_whisker(date):
    acceptable_trim_levels = ['XL','XLT','LARIAT']
    single_date_data = df[df.date_parsed == single_date]
    num_cars = len(single_date_data)
    fig = px.box(single_date_data, x='trim_level', y='list_price', points='all', 
            title=f'Used Ford Mavericks Listed on Cars.com in USA from: {single_date}, {num_cars} cars shown', 
            category_orders={'trim_level':acceptable_trim_levels},
            labels={'trim_level':'Trim Level','list_price':'List Price'}, width=1200, height=800)
    return fig

In [98]:
box_and_whisker(single_date)

In [192]:
def generate_mean_and_trend(df):
    # OLS for plotting 
    mean_price = df.groupby(by='date_parsed').mean(numeric_only=True)['list_price'].round(0).astype(int)
    mean_price = mean_price.to_frame().reset_index().rename(columns={'list_price':'mean_list_price'})

    # generate a trendline for mean_price
    mean_price['date_parsed'] = pd.to_datetime(mean_price['date_parsed'])
    mean_price['date_parsed_float'] = mean_price['date_parsed'].apply(lambda x: datetime.datetime.timestamp(x)) 
    mean_price['mean_list_price'] = mean_price.mean_list_price.astype(float)

    # Training Data
    X = mean_price.loc[:,['date_parsed_float']] # features
    y = mean_price.loc[:,['mean_list_price']] # target

    # train the model
    model = LinearRegression()
    model.fit(X,y)

    # store the fitted values as a time series with the same index as
    # the training data
    #y_pred = pd.Series(model.predict(X), index=X.index)
    predictions = model.predict(X)
    trend_line = []
    for i in range(len(predictions)):
        trend_line.append(predictions[i][0])
    mean_price['trend_line'] = trend_line
    return mean_price

In [194]:
mean_price = generate_mean_and_trend(df)

In [203]:
def plot_mean_price_over_time(df):
    # generate df of data.  No callback required for this plot unless you want to try filtering by trim level

    t = mean_price['date_parsed']
    y = mean_price['mean_list_price']
    y_trend = mean_price['trend_line']

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=y, mode='markers', name='Price (USD)'))
    fig.add_trace(go.Scatter(x=t, y=y_trend, mode='lines', name='Trend'))
    fig.update_layout(
            title={'text':'Average Price for All Trim Levels Over TIme'},
            xaxis_title={'text':'Date Data Parsed'},
            yaxis_title={'text':'Average Price'}
        )
    return fig
fig = plot_mean_price_over_time(df)
fig.show()

In [204]:
date

'2023-05-11'

In [209]:
dff = df[df['date_parsed'] == date]
grouped = dff.groupby(by='trim_level').count()['model_year'].to_frame().rename(columns={'model_year':'count'}).reset_index(0)
fig = go.Figure()
fig.add_trace(go.Histogram(x=grouped['count']))
fig.show()



In [219]:
def generate_dailybar(df):
    grouped = df.groupby(by='date_parsed').count()['model_year'].to_frame().rename(columns={'model_year':'count'}).reset_index()
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=grouped['date_parsed'],
        y=grouped['count'],
        text=y,
        textposition='auto'
    ))
    fig.update_layout(
        title={'text':'Mavericks Available Over Time'},
        yaxis_title={'text':'Count'}
    )
    return fig
fig = generate_dailybar(df)
fig.show()

In [3]:
import os
os.environ.get("PORT", 5000)

5000