In [4]:
# Replace 'ola.json' with your actual file path if needed
# Data handling
import json
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Sentiment analysis (TextBlob)
# !pip install textblob --quiet  # run this only once in a notebook cell to install
from textblob import TextBlob

# For handling the mode calculation
from statistics import mode

# (Optional) to suppress warnings in a notebook
import warnings
warnings.filterwarnings('ignore')
file_path = 'ola.json'
with open(file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(data)

print("DataFrame loaded. Shape:", df.shape)
df.head()
df.info()
# Example: fill `hashtags` if None with an empty list
df['hashtags'] = df['hashtags'].apply(lambda x: x if isinstance(x, list) else [])

# Fill numeric columns like 'views' with 0 if missing
df['views'] = df['views'].fillna(0)

# Ensure likes, shares, views, followers are integers
df['likes'] = df['likes'].astype(int)
df['shares'] = df['shares'].astype(int)
df['views'] = df['views'].astype(int)
df['followers'] = df['followers'].astype(int)

# Convert datetime if it follows 'DD-MM-YYYY HH:MM:SS'
df['datetime'] = pd.to_datetime(df['datetime'], format='%d-%m-%Y %H:%M:%S', errors='coerce')

# Drop rows with no content or tweet_id if they exist
df.dropna(subset=['content', 'tweet_id'], inplace=True)

print("Data cleaned. Shape:", df.shape)
df.head()

DataFrame loaded. Shape: (3244, 16)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3244 entries, 0 to 3243
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   tweet_id       3244 non-null   object 
 1   content        3244 non-null   object 
 2   datetime       3244 non-null   object 
 3   likes          3244 non-null   int64  
 4   shares         3244 non-null   int64  
 5   views          2442 non-null   float64
 6   source         3244 non-null   object 
 7   isBlue         3244 non-null   bool   
 8   followers      3244 non-null   int64  
 9   hashtags       1097 non-null   object 
 10  location       3244 non-null   object 
 11  user_mentions  3244 non-null   object 
 12  media          3244 non-null   object 
 13  username       3244 non-null   object 
 14  url            3244 non-null   object 
 15  comments       3244 non-null   object 
dtypes: bool(1), float64(1), int64(3), object(11)
memory usage: 3

Unnamed: 0,tweet_id,content,datetime,likes,shares,views,source,isBlue,followers,hashtags,location,user_mentions,media,username,url,comments
0,1871889326575136870,Yet again fucking driver accepted the booking ...,2024-12-25 12:02:48,0,0,23,TWITTER,False,93,[],,"[{'name': 'Ola', 'screen_name': 'Olacabs'}]",[https://pbs.twimg.com/media/GfpJadUXkAAqoVS.jpg],desire24_,https://x.com/desire24_/status/187188932657513...,[]
1,1871076431754580058,More than 1 hour and the food is still not her...,2024-12-23 06:12:38,2,1,107,TWITTER,False,72,[olafoods],,"[{'name': 'Ola', 'screen_name': 'Olacabs'}, {'...",[https://pbs.twimg.com/media/GfdmGSrWUAA5ZTP.jpg],Karan0072Karan,https://x.com/Karan0072Karan/status/1871076431...,[{'content': 'Customer care services are alway...
2,1871827021585928588,No one is constantly as motherfucking assholes...,2024-12-25 07:55:13,0,0,15,TWITTER,False,93,[],,"[{'name': 'Ola', 'screen_name': 'Olacabs'}]",[https://pbs.twimg.com/media/GfoQv5zXIAAcjA_.jpg],desire24_,https://x.com/desire24_/status/187182702158592...,[]
3,1580646160326615041,Freelance content writers needed. Fully remote...,2022-10-13 19:46:26,396,208,0,TWITTER,False,101828,[],,[],[https://pbs.twimg.com/media/Fe-VSXbWIAAaMIe.j...,OlamideTowobola,https://x.com/OlamideTowobola/status/158064616...,"[{'content': 'FYI', 'likes': 0, 'shares': 0, '..."
4,1863291107477721485,", Are you guys even concerned about what type ...",2024-12-01 18:36:32,0,0,32,TWITTER,False,47,[],,"[{'name': 'Ola', 'screen_name': 'ola_zkzkvm'},...",[https://pbs.twimg.com/media/Gdu9YbvXkAApPHn.jpg],DWasish,https://x.com/DWasish/status/1863291107477721485,[{'content': 'This type of people going forwar...
