<a href="https://colab.research.google.com/github/cytric-74/gta6-sales-prediction-/blob/main/gta6_sales_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/126.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m122.9/126.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [4]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from googleapiclient.discovery import build

In [5]:
!mkdir data


# ***Twitter data collection***

In [None]:
TOKKEN_TWIITER = "we need a token here"

In [None]:
def fetch_twitter_data(query, max_results=50):
    headers = {"Authorization": f"Bearer {TOKKEN_TWIITER}"}
    url = f"https://api.twitter.com/2/tweets/search/recent?query={query}&tweet.fields=created_at,public_metrics&max_results={max_results}"
    response = requests.get(url, headers=headers)
    tweets = response.json()('data', [])

    df = pd.DataFrame([{
        'text': tweet['text'],
        'created_at': tweet['created_at'],
        'likes': tweet['public_metrics']['like_count']
    } for tweet in tweets])

    df.to_csv('data/twitter_data.csv', index=False)
    return df

# ***Youtube data collection***

In [6]:
from googleapiclient.discovery import build
import pandas as pd

In [None]:
API_KEY = 'need to get a youtube api key asap'
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [None]:
def fetch_youtube_comments(query):
    search_response = youtube.search().list(
        q=query, part='snippet', type='video', maxResults=5
    ).execute()

    video_ids = [item['id']['videoId'] for item in search_response['items']]
    all_comments = []

    for vid in video_ids:
        comment_response = youtube.commentThreads().list(
            part='snippet', videoId=vid, textFormat='plainText', maxResults=50
        ).execute()

        for item in comment_response.get("items", []):
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            all_comments.append(comment)

    df = pd.DataFrame({'comment': all_comments})
    df.to_csv('data/youtube_data.csv', index=False)
    return df

# ***Web Scrapping***

---



In [None]:
def scrape_news(query):
    formatted_query = query.replace(" ", "%20")
    search_url = f"https://news.google.com/search?q={formatted_query}"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = []
    for a in soup.select('article h3'):
        if a.a:
            title = a.text.strip()
            link = "https://news.google.com" + a.a['href'][1:]
            articles.append({'title': title, 'link': link})

    df = pd.DataFrame(articles)
    df.to_csv('data/news_articles.csv', index=False)
    return df

# ***Sentiment Analysis***

In [None]:
def analyze_sentiment(df, column):
    df['sentiment'] = df[column].astype(str).apply(lambda x: analyzer.polarity_scores(x)['compound'])
    return df

# ***Model Prediction***

In [1]:
from sklearn.linear_model import LinearRegression

In [None]:
def combine_and_predict():
    twitter_df = analyze_sentiment(pd.read_csv("data/twitter_data.csv"), "text")
    youtube_df = analyze_sentiment(pd.read_csv("data/youtube_data.csv"), "comment")
    news_df = analyze_sentiment(pd.read_csv("data/news_articles.csv"), "title")

    avg_sentiments = pd.DataFrame({
        'twitter_sentiment': [twitter_df['sentiment'].mean()],
        'youtube_sentiment': [youtube_df['sentiment'].mean()],
        'news_sentiment': [news_df['sentiment'].mean()]
    })

    # Dummy regression model for demonstration
    model = LinearRegression()
    X = avg_sentiments
    y = [80]  # Suppose GTA 5 sold 80 million units in a similar pre-release phase
    model.fit(X, y)
    prediction = model.predict(X)

    print("\nPredicted GTA 6 Sales (in millions):", round(prediction[0], 2))
    avg_sentiments['predicted_sales'] = prediction
    avg_sentiments.to_csv("data/sales_prediction.csv", index=False)

# ***Running***

In [None]:
if __name__ == "__main__":
    print("Collecting Twitter data...")
    fetch_twitter_data("GTA 6")

    print("Collecting YouTube comments...")
    fetch_youtube_comments("GTA 6 trailer")

    print("Scraping news articles...")
    scrape_news("GTA 6")

    print("Analyzing sentiment and predicting sales...")
    combine_and_predict()
    print("All done! Check the 'data/' folder for CSVs.")