<a href="https://colab.research.google.com/github/akint980/Vegamour_Data_Analysis/blob/main/Tweetlens.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Generating Tweets related to Vegamour Hair products in order to gain insights into Customers' feedback. Customers' feedback can be used to improve products. To generate these data, I made use of Synthetic tweets from Transformers and real tweets from X using X API. This code produced real tweets from X using X bearer token.

In [None]:


import tweepy
import csv
import random
import re
from datetime import datetime

# -----------------------------
# 🔐 Hardcoded X API Bearer Token
# -----------------------------
BEARER_TOKEN = "AAAAAAAAAAAAAAAAAAAAAHlD3AEAAAAAY%2BdZ8XM23sb3kEOmmxTH9rcnlEk%3DwalutA7bG4jn7ISxA3ba04EseoEuJu2fkSENi42WZrPXfyEDPm"
client = tweepy.Client(bearer_token=BEARER_TOKEN, wait_on_rate_limit=True)

# -----------------------------
# 🧼 Clean tweet text
# -----------------------------
def clean_tweet(text):
    text = re.sub(r'https?://\S+', '', text)        # Remove URLs
    text = re.sub(r'[@#]\w+', '', text)             # Remove mentions/hashtags
    text = re.sub(r'\s+', ' ', text).strip()        # Normalize whitespace
    return text[:277] + "..." if len(text) > 280 else text

# -----------------------------
# 📦 Metadata Pools
# -----------------------------
products = ["Vegamour GRO Serum", "Vegamour Biotin Gummies", "Vegamour Scalp Detox", "Vegamour Dry Shampoo"]
emotions = ["Joy", "Frustration", "Nostalgia", "Gratitude"]
intents = ["Praise", "Complaint", "Testimonial", "Recommendation"]

# -----------------------------
# 🔍 Fetch real tweets from X
# -----------------------------
def fetch_vegamour_tweets(query="vegamour", max_results=300):
    tweets_data = []
    response = client.search_recent_tweets(
        query=query,
        max_results=100,  # max per call
        tweet_fields=["created_at", "text"],
    )

    if not response.data:
        print("No tweets found.")
        return []

    for idx, tweet in enumerate(response.data):
        cleaned = clean_tweet(tweet.text)
        timestamp = tweet.created_at.isoformat()

        tweets_data.append([
            idx + 1,  # prompt_id replacement
            timestamp,
            cleaned,
            random.choice(products),
            random.choice(emotions),
            random.choice(intents)
        ])

    return tweets_data

# -----------------------------
# 💾 Save tweets to CSV
# -----------------------------
def save_to_csv(rows, output_file="vegamour_real_tweets.csv"):
    with open(output_file, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["prompt_id", "timestamp", "tweet_text", "product_mention", "emotion_tag", "intent_tag"])
        writer.writerows(rows)
    print(f"✅ Saved {len(rows)} real tweets to {output_file}")

# -----------------------------
# 🚀 Main Execution
# -----------------------------
if __name__ == "__main__":
    real_tweets = fetch_vegamour_tweets()
    if real_tweets:
        save_to_csv(real_tweets)





✅ Saved 6 real tweets to vegamour_real_tweets.csv


✅ Saved 6 real tweets to vegamour_real_tweets.csv


2. Generating tweets using transformers.

In [None]:

import random
import re
from datetime import datetime, timedelta
from transformers import pipeline

# Load GPT-2 model for text generation
generator = pipeline("text-generation", model="gpt2")

# Metadata pools
products = ["Vegamour GRO Serum", "Vegamour Biotin Gummies", "Vegamour Scalp Detox", "Vegamour Dry Shampoo"]
emotions = ["Joy", "Frustration", "Nostalgia", "Gratitude"]
intents = ["Praise", "Complaint", "Testimonial", "Recommendation"]

# Cleaning function for generated tweets
def clean_tweet(text):
    # Remove GPT prompt echoes or irrelevant text
    text = re.sub(r'Tweet\s*#\d+\s*about\s*Vegamour[:\-]*\s*', '', text, flags=re.IGNORECASE)
    # Remove URLs, hashtags, mentions
    text = re.sub(r'https?://\S+', '', text)
    text = re.sub(r'[@#]\w+', '', text)
    # Normalize whitespace
    text = re.sub(r'\s+', ' ', text).strip()
    # Limit length (max 280 chars for tweets)
    if len(text) > 280:
        text = text[:277] + "..."
    return text

# Generate tweets function
def generate_tweets(prompt, num_tweets):
    results = generator(
        prompt,
        max_length=60,
        num_return_sequences=num_tweets,
        do_sample=True,
        top_k=50,
        top_p=0.95
    )
    return [clean_tweet(r["generated_text"]) for r in results]

# Generate synthetic Vegamour tweets
all_rows = []
num_prompts = 30
tweets_per_prompt = 30
start_time = datetime(2024, 7, 1, 8, 0, 0)  # Changed year to 2020

for prompt_id in range(1, num_prompts + 1):
    prompt = f"Vegamour hair product tweet #{prompt_id}"
    synthetic_tweets = generate_tweets(prompt, tweets_per_prompt)

    for i, tweet_text in enumerate(synthetic_tweets):
        timestamp = start_time + timedelta(minutes=prompt_id * 50 + i)
        all_rows.append([
            prompt_id,
            timestamp.isoformat(),
            tweet_text,
            random.choice(products),
            random.choice(emotions),
            random.choice(intents)
        ])

# Save generated tweets to CSV
output_file = "vegamour_hair_product_tweets_2024.csv"
with open(output_file, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["prompt_id", "timestamp", "tweet_text", "product_mention", "emotion_tag", "intent_tag"])
    writer.writerows(all_rows)

print(f"✅ Saved {len(all_rows)} Vegamour hair product tweets to {output_file}")
