In [33]:
#import libraries for nlp
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import os
import time
from datetime import datetime


In [34]:
#show all rows
pd.set_option('display.max_rows', None)


In [35]:
import pandas as pd

# Proper column names for Sentiment140 dataset
columns = ['target', 'id', 'date', 'query', 'user', 'text']

df = pd.read_csv(
    "tweets.csv",
    encoding='latin-1',
    names=columns
)

print(df.head())
print(df.info())


   target          id                          date     query  \
0       0  1467810369  Mon Apr 06 22:19:45 PDT 2009  NO_QUERY   
1       0  1467810672  Mon Apr 06 22:19:49 PDT 2009  NO_QUERY   
2       0  1467810917  Mon Apr 06 22:19:53 PDT 2009  NO_QUERY   
3       0  1467811184  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   
4       0  1467811193  Mon Apr 06 22:19:57 PDT 2009  NO_QUERY   

              user                                               text  
0  _TheSpecialOne_  @switchfoot http://twitpic.com/2y1zl - Awww, t...  
1    scotthamilton  is upset that he can't update his Facebook by ...  
2         mattycus  @Kenichan I dived many times for the ball. Man...  
3          ElleCTF    my whole body feels itchy and like its on fire   
4           Karoli  @nationwideclass no, it's not behaving at all....  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------

In [36]:
df.head()

Unnamed: 0,target,id,date,query,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0   target  1600000 non-null  int64 
 1   id      1600000 non-null  int64 
 2   date    1600000 non-null  object
 3   query   1600000 non-null  object
 4   user    1600000 non-null  object
 5   text    1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB


In [38]:
df.shape

(1600000, 6)

In [39]:
df.head()

Unnamed: 0,target,id,date,query,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [40]:
df['target'].value_counts()

target
0    800000
4    800000
Name: count, dtype: int64

In [41]:
df.head()

Unnamed: 0,target,id,date,query,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."


In [42]:
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)  # remove URLs
    text = re.sub(r'@\w+', '', text)     # remove mentions
    text = re.sub(r'#\w+', '', text)     # remove hashtags
    text = re.sub(r'[^a-z\s]', '', text) # keep only letters
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

df['clean_text'] = df['text'].apply(clean_text)


[nltk_data] Downloading package stopwords to C:\Users\Furqan
[nltk_data]     Khan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [43]:
df.head()

Unnamed: 0,target,id,date,query,user,text,clean_text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",awww thats bummer shoulda got david carr third...
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...,upset cant update facebook texting might cry r...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...,dived many times ball managed save rest go bounds
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire,whole body feels itchy like fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all....",behaving im mad cant see


In [44]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X = vectorizer.fit_transform(df['clean_text'])
y = df['target']


In [45]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [46]:


model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.79      0.75      0.77    159494
           4       0.76      0.80      0.78    160506

    accuracy                           0.78    320000
   macro avg       0.78      0.78      0.78    320000
weighted avg       0.78      0.78      0.78    320000



In [53]:
positive_tweets = [
    "Bitcoin is going to the moon 🚀🚀",
    "BTC is the future of money 🌎",
    "Feeling amazing about my crypto gains today!",
    "Ethereum upgrade is a huge success 🎉",
    "Bull run incoming, stay ready! 📈",
    "I love the crypto community ❤️",
    "Holding my BTC strong 💪",
    "Just bought more Bitcoin, best investment ever!",
    "Crypto market looking green today 🌱",
    "So happy I invested early in ETH!",
    "Profits keep growing every week 🔥",
    "This new blockchain project is brilliant!",
    "Altcoins are pumping hard today 📊",
    "NFTs are changing the art world for the better 🎨",
    "I believe in decentralization 🌐",
    "So proud to be part of this revolution!",
    "Crypto adoption is unstoppable ⚡",
    "Every dip is just another buying opportunity 😉",
    "Made my first 2x profit in crypto today 😍",
    "BTC has secured my financial freedom 🙌",
    "Love seeing mainstream companies accept Bitcoin!",
    "Trading crypto is so exciting 💹",
    "Amazing how blockchain solves real problems",
    "This project will disrupt industries!",
    "Feeling rich after today’s pump 😂",
    "Crypto is making me hopeful for the future 💫",
    "Proud to be an early adopter 🚀",
    "Happy to see regulations going pro-crypto",
    "Met awesome people in the crypto space 🤝",
    "Made huge returns from staking ETH 💵",
    "So motivated after today’s gains!",
    "Crypto gives me control over my money 🔑",
    "Another day, another profit 📈",
    "Excited to see crypto going mainstream 🎯",
    "I trust BTC more than banks",
    "What a bullish chart! 📊",
    "Crypto innovation never stops 💡",
    "Bitcoin saved my portfolio!",
    "Love the energy of the bull market ⚡",
    "This is financial freedom 💸",
    "Crypto is the best investment I’ve ever made",
    "Happy that I didn’t sell the dip 😎",
    "Crypto = Future 🚀",
    "Everything is pumping today 🔥",
    "So optimistic about the future of DeFi",
    "Love learning about new crypto projects",
    "Investing in blockchain feels amazing",
    "The growth of this space is inspiring 🌟",
    "Crypto is my passion ❤️",
    "Another all-time high! 🎉",
    "Crypto winter is over ☀️",
    "So confident about this bull cycle",
    "Made enough profit to buy a car 🚗",
    "Crypto keeps surprising me positively",
    "Feeling like a genius for buying BTC early",
    "The best decision of my life: Bitcoin",
    "Every day I get more bullish!",
    "Crypto is here to stay",
    "Excited for the halving event 🔥",
    "Happy to be my own bank 🏦",
    "Positive vibes only in the market today 🌈",
    "Bullish candles everywhere 📈",
    "Crypto community is so supportive",
    "Love this decentralized world",
    "So many opportunities in this market",
    "Crypto gives me hope for the economy",
    "Financial freedom is real with Bitcoin",
    "Crypto has changed my life 🙌",
    "Alt season is here! 🚀",
    "Proud to be part of Web3",
    "Excited about the metaverse trend",
    "Crypto is empowering people globally 🌍",
    "Just made a killer trade 🔥",
    "Hodling is the best strategy 😎",
    "This chart looks so bullish 📊",
    "Feeling grateful for crypto",
    "Crypto = innovation + freedom",
    "DeFi is the future of finance",
    "Bitcoin protects my wealth",
    "So happy about my passive income",
    "Crypto adoption growing daily 🚀",
    "Bullish momentum is unstoppable",
    "Made more money in crypto than stocks",
    "Excited about blockchain gaming",
    "Crypto makes me optimistic",
    "Crypto is the best hedge against inflation",
    "Every day I learn something new in crypto",
    "This bull market feels legendary!",
    "Feeling lucky to be in this space",
    "Crypto is revolutionizing everything",
    "So bullish about ETH scaling solutions",
    "Crypto freedom is priceless 💎",
    "Lambo dreams are real 🚘",
    "Crypto market is full of opportunities",
    "My portfolio keeps growing 🚀",
    "Happy traders today 😊",
    "This dip recovery is so bullish",
    "Crypto is financial independence",
    "Feeling confident about BTC’s future",
    "Crypto is unstoppable innovation",
    "Love watching crypto grow daily"
]
#also add if 4 then positive and if 0 then negative
# ✅ Test with your positive_tweets list
sample_clean = [clean_text(t) for t in positive_tweets]   # preprocess
sample_vec = vectorizer.transform(sample_clean)           # vectorize
predictions = model.predict(sample_vec)                   # predict

# Print predictions alongside text
for text, pred in zip(positive_tweets, predictions):
    sentiment = "Positive 😀 (4)" if pred == 4 else "Negative 😡 (0)"
    print(f"{sentiment} --> {text}")


Positive 😀 (4) --> Bitcoin is going to the moon 🚀🚀
Positive 😀 (4) --> BTC is the future of money 🌎
Positive 😀 (4) --> Feeling amazing about my crypto gains today!
Positive 😀 (4) --> Ethereum upgrade is a huge success 🎉
Positive 😀 (4) --> Bull run incoming, stay ready! 📈
Positive 😀 (4) --> I love the crypto community ❤️
Positive 😀 (4) --> Holding my BTC strong 💪
Positive 😀 (4) --> Just bought more Bitcoin, best investment ever!
Positive 😀 (4) --> Crypto market looking green today 🌱
Positive 😀 (4) --> So happy I invested early in ETH!
Positive 😀 (4) --> Profits keep growing every week 🔥
Positive 😀 (4) --> This new blockchain project is brilliant!
Negative 😡 (0) --> Altcoins are pumping hard today 📊
Positive 😀 (4) --> NFTs are changing the art world for the better 🎨
Positive 😀 (4) --> I believe in decentralization 🌐
Positive 😀 (4) --> So proud to be part of this revolution!
Positive 😀 (4) --> Crypto adoption is unstoppable ⚡
Positive 😀 (4) --> Every dip is just another buying opportunity 

In [55]:
negative_tweets = [
    "Crypto crash, lost all my money 😭",
    "Bitcoin is such a scam!",
    "Feeling hopeless after losing my savings in crypto",
    "Another rug pull, I’m done with altcoins 😡",
    "ETH gas fees are ridiculous!",
    "Crypto is nothing but gambling 🎲",
    "Wish I never bought Bitcoin 😢",
    "This bear market is killing me",
    "Lost everything in the crash 💔",
    "Trading crypto makes me so anxious",
    "Market manipulation everywhere 😠",
    "Scammers everywhere in this space",
    "Bitcoin will never recover",
    "Crypto makes me depressed",
    "I regret investing in this ponzi",
    "These charts look awful 📉",
    "So tired of losing money",
    "Every pump is followed by a huge dump",
    "Crypto influencers are liars 🤥",
    "I lost faith in blockchain projects",
    "Why did I fall for this scam?",
    "Crypto trading ruined my mental health",
    "No hope left for BTC",
    "Crypto is dead 💀",
    "This market is trash",
    "Lost all my profits in one day 😞",
    "Bear markets are the worst",
    "I hate how volatile crypto is",
    "Every day is another loss 📉",
    "Scammed again by a fake project",
    "Crypto community is toxic",
    "Nothing but losses in this market",
    "Wish I had stayed out of crypto",
    "Crypto will never be mainstream",
    "So many hacks in DeFi",
    "My portfolio is red every day 🔴",
    "Bitcoin is just a bubble",
    "I can’t trust crypto anymore",
    "Crypto made me broke 💸",
    "This technology is overrated",
    "Crypto makes me nervous 😟",
    "Bearish candles everywhere 📉",
    "Feels like I wasted years in crypto",
    "So angry at these scam projects",
    "Market crash ruined my week",
    "I’m done with trading forever",
    "Crypto only benefits whales 🐋",
    "Every altcoin is a scam",
    "I regret listening to influencers",
    "Nothing works as promised",
    "Crypto hype is fake",
    "This project is going nowhere",
    "Lost all my savings to Bitcoin",
    "Crypto is destroying my sleep schedule",
    "No more trust in this market",
    "Crypto is just for gamblers",
    "I can’t recover my losses",
    "Every dip feels endless",
    "Crypto feels hopeless",
    "Too many scams in this industry",
    "I feel cheated by this project",
    "Bear market is painful",
    "Crypto market is ugly right now",
    "I hate watching my portfolio shrink",
    "Crypto is destroying people’s lives",
    "No profits, only losses",
    "This industry is a joke",
    "Wish I sold earlier 😞",
    "Crypto charts look terrible",
    "Bearish trend is never-ending",
    "So disappointed with Bitcoin",
    "Crypto adoption is too slow",
    "Investors keep getting scammed",
    "Everything is crashing again",
    "Crypto is unsafe",
    "Lost trust in blockchain",
    "Crypto is worthless",
    "Bearish momentum everywhere",
    "Nothing but stress in this space",
    "Crypto is not the future",
    "Feeling empty after losing money",
    "Crypto hype ruined me",
    "I’ll never invest again",
    "Market sentiment is terrible",
    "So tired of all these losses",
    "Another rug pull today 😠",
    "Crypto is full of liars",
    "Bitcoin ruined my finances",
    "Bearish pressure is too strong",
    "Crypto market is toxic",
    "So sad about this market",
    "This industry is hopeless",
    "Every coin is going down 📉",
    "Crypto feels like a trap",
    "Regret ever buying BTC",
    "Crypto investing is a nightmare",
    "Market has no future",
    "Lost everything I had in crypto",
    "Crypto is all hype and no substance",
    "I’m giving up on this space",
    "Bearish cycles never end",
    "Crypto turned my dreams into nightmares"
]
# Clean & transform negative tweets
sample_clean = [clean_text(t) for t in negative_tweets]
sample_vec = vectorizer.transform(sample_clean)
predictions = model.predict(sample_vec)

# Print predictions with labels
for text, pred in zip(negative_tweets, predictions):
    sentiment = "Positive 😀 (4)" if pred == 4 else "Negative 😡 (0)"
    print(f"{sentiment} --> {text}")

Negative 😡 (0) --> Crypto crash, lost all my money 😭
Positive 😀 (4) --> Bitcoin is such a scam!
Negative 😡 (0) --> Feeling hopeless after losing my savings in crypto
Positive 😀 (4) --> Another rug pull, I’m done with altcoins 😡
Negative 😡 (0) --> ETH gas fees are ridiculous!
Negative 😡 (0) --> Crypto is nothing but gambling 🎲
Negative 😡 (0) --> Wish I never bought Bitcoin 😢
Negative 😡 (0) --> This bear market is killing me
Negative 😡 (0) --> Lost everything in the crash 💔
Positive 😀 (4) --> Trading crypto makes me so anxious
Positive 😀 (4) --> Market manipulation everywhere 😠
Positive 😀 (4) --> Scammers everywhere in this space
Negative 😡 (0) --> Bitcoin will never recover
Negative 😡 (0) --> Crypto makes me depressed
Negative 😡 (0) --> I regret investing in this ponzi
Negative 😡 (0) --> These charts look awful 📉
Negative 😡 (0) --> So tired of losing money
Positive 😀 (4) --> Every pump is followed by a huge dump
Positive 😀 (4) --> Crypto influencers are liars 🤥
Negative 😡 (0) --> I lost

In [50]:
#save the model
joblib.dump(model, 'sentiment_model.pkl')


['sentiment_model.pkl']

In [51]:
joblib.dump(vectorizer, "vectorizer.pkl")


['vectorizer.pkl']

In [56]:
#load the model
loaded_model = joblib.load('sentiment_model.pkl')

# Test the loaded model with some samples
sample_vec = vectorizer.transform(['I am so happy about this project'])

print(loaded_model.predict(sample_vec))


[4]
