# Project Cobra
### Logan Snyder, Iluda Ko, Brett Simmons, Joey Markun
This noetbook will show various statistics of vehicle sales at the Ames Ford dealership, and compare those to some national car data. It will also consider tweets made about certain cars and how that compares to how well that car is selling. 

In [6]:
import tweepy
from keys import *
import requests
import pandas as pd
from textblob import TextBlob

client = tweepy.Client(bearer_token, api_key, api_secret_key, access_token, access_token_secret)
client = tweepy.Client( bearer_token=bearer_token, 
                        consumer_key=api_key, 
                        consumer_secret=api_secret_key, 
                        access_token=access_token, 
                        access_token_secret=access_token_secret, 
                        return_type = requests.Response,
                        wait_on_rate_limit=True)
# Define query
query = '(Ford F150) lang:en'
# get tweets
tweets = client.search_recent_tweets(query=query, 
                                    tweet_fields=['created_at'],
                                     max_results=50)
# Save data as dictionary
tweets_dict = tweets.json() 
# Extract "data" value from dictionary
tweets_data = tweets_dict['data'] 
# Transform to pandas Dataframe
twitter_df = pd.json_normalize(tweets_data) 

# make polarity and subjectiveness rows, using textblob to calculate the values for each
twitter_df['polarity'] = twitter_df['text'].apply(lambda x: float(TextBlob(x).sentiment.polarity)) #-1 to 1(positive)
twitter_df['subjectiveness'] = twitter_df['text'].apply(lambda x: float(TextBlob(x).sentiment.subjectivity))

# Add another row to the dataframe called classification. 
twitter_df.loc[:, 'classification'] = 'nt' #set all rows to neutral first 
twitter_df.loc[twitter_df['polarity'] > 0.3, 'classification'] = 'pos' #set all rows above this threshold to pos
twitter_df.loc[twitter_df['polarity'] < -0.3, 'classification'] = 'neg' #set all rows to neg w/ polarity below this threshold 

twitter_df.head(5)

Unnamed: 0,created_at,id,text,polarity,subjectiveness,classification
0,2022-04-29T19:50:54.000Z,1520128512170070016,Construction is in full swing at Williams Ford...,0.321591,0.563636,pos
1,2022-04-29T19:45:54.000Z,1520127253522178048,RT @aminorjourney: I had such fun in this inte...,0.15,0.35,nt
2,2022-04-29T19:44:49.000Z,1520126981328842755,2011 Ford F250 Super Duty Crew Cab Lariat Pick...,0.333333,0.666667,pos
3,2022-04-29T19:40:07.000Z,1520125798279172096,The driver broke his back in the process https...,0.0,0.0,nt
4,2022-04-29T19:37:00.000Z,1520125014531624963,https://t.co/ovvuIZbj5t Take a look at this 20...,0.0,1.0,nt


In [2]:
data_df = pd.read_csv("https://raw.githubusercontent.com/iludako/final_project/main/MIS307%20Final%20Project%20Database.csv") 
data_df.head(3)


Unnamed: 0,StockNo,Customer ID,VehicleType,Model,CarTrim,DateSold,IsHybrid,IsDiesel,IsElectric,MPG,Range,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17
0,16517,7,Car,Mustang,EcoBoost,1/6/21,N,N,N,26.5,-,,,,,,,
1,16385,62,Car,Mustang,EcoBoost,2/18/21,N,N,N,26.5,-,,,,,,,
2,15687,185,Car,Mustang,EcoBoost,5/15/21,N,N,N,26.5,-,,,,,,,


In [3]:
# set dictionary for counting trim occurances
trim_count = {}
for i in data_df.index:
    trim = data_df.loc[i]['CarTrim']
    if trim in trim_count: 
        trim_count[trim] +=1
    else: 
        trim_count[trim] = 1
        
print(trim_count)

{'EcoBoost': 4, 'EcoBoost Premium': 3, 'GT': 4, 'GT Premium': 3, 'King Ranch': 13, 'Lariat': 21, 'Limited': 14, 'Platinum': 13, 'Plug-in Hybrid': 4, 'Police Inteceptor': 11, 'Hybrid Police Inteceptor': 10, 'Raptor': 4, 'S': 10, 'SE': 19, 'SE ': 1, 'SE Hybrid': 8, 'SEL': 19, 'Shelby GT350': 1, 'Shelby GT500': 1, 'ST': 3, 'Titanium': 10, 'Titanium Hybrid': 6, 'XL': 54, 'XL ': 2, 'XLT': 33}


In [4]:
# set dictionary for counting model occurances
model_count = {}
for i in data_df.index:
    model = data_df.loc[i]['Model']
    if model in model_count: 
        model_count[model] +=1
    else: 
        model_count[model] = 1
        
print(model_count)

{'Mustang': 16, 'F-150': 110, 'F-250': 7, 'F-350': 13, 'F-450': 3, 'Expedition': 6, 'Explorer': 38, 'F-550': 1, 'Fusion': 6, 'Escape': 61, 'EcoSport': 10}


In [10]:
print(f"The most popular selling Ford vehicle for the Ames dealership is the F-150, and it accounts for {model_count['F-150']/len(data_df.loc[:]) *100 :.2f}% of all sales at the dealership. ")

The most popular selling Ford vehicle for the Ames dealership is the F-150, and it accounts for 40.59% of all sales at the dealership. 
