# Sentiment Analysis on Twitter Data
This notebook analyzes the sentiment of tweets as Positive, Negative, or Neutral.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
url = 'https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv'
tweets = pd.read_csv(url)

# Display the first few rows
tweets.head()

In [None]:
# Preprocess the data
tweets['cleaned_text'] = tweets['tweet'].str.replace('[^a-zA-Z ]', '', regex=True).str.lower()
tweets = tweets[['cleaned_text', 'label']]
tweets.dropna(inplace=True)

# Convert text to numerical format
vectorizer = CountVectorizer(stop_words='english', max_features=5000)
X = vectorizer.fit_transform(tweets['cleaned_text'])
y = tweets['label']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('\nClassification Report:\n', classification_report(y_test, y_pred))

### Insights
- The model can classify tweets with a reasonable level of accuracy.
- Further optimization and feature engineering can improve the results.