In [2]:
!pip install pandas scikit-learn tensorflow

Collecting tensorflow
  Using cached tensorflow-2.20.0-cp313-cp313-win_amd64.whl.metadata (4.6 kB)
Using cached tensorflow-2.20.0-cp313-cp313-win_amd64.whl (332.0 MB)
Installing collected packages: tensorflow
Successfully installed tensorflow-2.20.0


In [3]:
import pandas
import sklearn
import tensorflow as tf

print("Pandas:", pandas.__version__)
print("Sklearn:", sklearn.__version__)
print("TensorFlow:", tf.__version__)




Pandas: 2.3.1
Sklearn: 1.7.1
TensorFlow: 2.20.0


In [5]:
import pandas as pd

df = pd.read_csv("all-data.csv", names=['label', 'text'], encoding='latin-1')
df['label'] = df['label'].str.lower().str.strip()
df.head()


Unnamed: 0,label,text
0,neutral,"According to Gran , the company has no plans t..."
1,neutral,Technopolis plans to develop in stages an area...
2,negative,The international electronic industry company ...
3,positive,With the new production plant the company woul...
4,positive,According to the company 's updated strategy f...


In [6]:
from sklearn.preprocessing import LabelEncoder

# Convert text labels to numbers
encoder = LabelEncoder()
df['label_enc'] = encoder.fit_transform(df['label'])

# Check mapping
print(dict(zip(encoder.classes_, range(len(encoder.classes_)))))


{'negative': 0, 'neutral': 1, 'positive': 2}


In [7]:
from sklearn.model_selection import train_test_split

# Split 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label_enc'], test_size=0.2, random_state=42
)

print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")


Training samples: 3876, Test samples: 970


In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 5000   # maximum number of words to keep
max_len = 100       # max words per sentence

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=max_len, padding='post')
X_test_seq = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=max_len, padding='post')

# Check shapes
print(X_train_seq.shape, X_test_seq.shape)


(3876, 100) (970, 100)


In [17]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# 1️⃣ Vectorize text using TF-IDF
vectorizer = TfidfVectorizer(max_features=3000, ngram_range=(1,2), stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 2️⃣ Train Logistic Regression
model_lr = LogisticRegression(max_iter=500)
model_lr.fit(X_train_tfidf, y_train)

# 3️⃣ Evaluate
y_pred = model_lr.predict(X_test_tfidf)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.44      0.57       110
           1       0.74      0.95      0.83       571
           2       0.77      0.48      0.59       289

    accuracy                           0.75       970
   macro avg       0.78      0.62      0.66       970
weighted avg       0.76      0.75      0.73       970

Confusion Matrix:
 [[ 48  48  14]
 [  3 541  27]
 [  7 144 138]]


In [23]:
aa=str(input("ENTER FIRST SENTENCE - "))
bb=str(input("ENTER SECOND SENTENCE - "))
cc=str(input("ENTER THIRD SENTENCE - "))
sample_texts = [aa,bb,cc]

sample_features = vectorizer.transform(sample_texts)
predictions = model_lr.predict(sample_features)

for text, pred in zip(sample_texts, predictions):
    label = encoder.inverse_transform([pred])[0]
    print(f"{label.upper():<8} | {text}")

ENTER FIRST SENTENCE -  firm is in profits
ENTER SECOND SENTENCE -  firm is in loss
ENTER THIRD SENTENCE -  firm is neither loss nor profits


NEUTRAL  | firm is in profits
NEUTRAL  | firm is in loss
NEUTRAL  | firm is neither loss nor profits
