In [1]:
import numpy as np
import pandas as pd
import category_encoders as ce
import sys, os
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from mpl_toolkits.mplot3d import Axes3D


In [2]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from file_handler import FileHandler
from df_selector import *
from df_cleaner import *
from df_visualizer import *
from app_logger import App_Logger

In [3]:
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)

## Reading Data

In [4]:
# create a FileHandler object
file_handler = FileHandler()

In [5]:


# reading the store csv file
df = file_handler.read_csv("../data/DoctorsET.csv")
df.head(10)


Unnamed: 0,signature,channel_id,channel_name,msg_id,message,cleaned_message,date,msg_link,msg_from_peer,msg_from_id,views,number_replies,number_forwards,is_forward,forward_msg_from_peer_type,forward_msg_from_peer_id,forward_msg_from_peer_name,forward_msg_date,forward_msg_date_string,forward_msg_link,is_reply,reply_to_msg_id,reply_msg_link,contains_media,media_type,has_url,url,domain,url_title,url_description,document_type,document_id,document_video_duration,document_filename,poll_id,poll_question,poll_total_voters,poll_results,contact_phone_number,contact_name,contact_userid,geo_type,lat,lng,venue_id,venue_type,venue_title,venue_address,venue_provider
0,msg_iteration.0.user.DoctorsET.post.864,1102021238,DoctorsET,864,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...,2023-12-18 17:04:02+00:00,https://t.me/DoctorsET/864,,,5757,0,3,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVDzL,youtu.be,·â•·àã·ä≠ ·àõ·à≠·ä¨·âµ ·åà·â†·ã´ ·àõ·äê·ãç ·àö·â∞·àù·äê·ãç/·â¢·ãù·äê·àµ ·à≤·åÄ·àù·à© ·àö·à∞·à©·âµ 3 ·àµ·àÖ·â∞·â∂·âΩ/...,#ethiopia #doctorsethiopia #habesha #ethiopian...,,,,,,,,,,,,,,,,,,,
1,msg_iteration.1.user.DoctorsET.post.863,1102021238,DoctorsET,863,·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â† ·ä†·ã≤·àµ ·ä†·âÄ·à´·à®·â• ·â† ·â¥·àå·â™·ã•·äï ·çï·àÆ·åç·à´·àô·äï ·àà·àò·åÄ·àò·à≠ ·ä®...,·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â† ·ä†·ã≤·àµ ·ä†·âÄ·à´·à®·â• ·â† ·â¥·àå·â™·ã•·äï ·çï·àÆ·åç·à´·àô·äï ·àà·àò·åÄ·àò·à≠ ·ä®...,2023-11-03 16:14:39+00:00,https://t.me/DoctorsET/863,,,8440,0,5,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/gwVN5eJQpko?si=xARsSxIEdZtE91GY,youtu.be,·ä®·ä†·àú·à™·ä´ ·ãà·ã∞ ·ä¢·âµ·ãÆ·åµ·ã´ / ·åà·äï·ã≥ ·ãç·àµ·å• ·ä®·àò·â∞·äõ·âµ ·ä•·àµ·ä® 500·à∫·àÖ ·â∞·ä®·â≥·ãÆ·âΩ...,#ethiopia #ethiopianmusic #doctorsethiopia #et...,,,,,,,,,,,,,,,,,,,
2,msg_iteration.2.user.DoctorsET.post.862,1102021238,DoctorsET,862,·àû·âµ ·â†·àµ·ä≥·à≠ \n\n·àà·àç·åÜ·âª·âΩ·äï ·ã®·àù·äì·à≤·ãò·ãç ·àù·à≥·âÉ ·à≥·äì·âÄ·ãç ·ä•·ãµ·àö·ã´·â∏·ãç·äï ·ã≠·âÄ·äï...,·àû·âµ ·â†·àµ·ä≥·à≠ ·àà·àç·åÜ·âª·âΩ·äï ·ã®·àù·äì·à≤·ãò·ãç ·àù·à≥·âÉ ·à≥·äì·âÄ·ãç ·ä•·ãµ·àö·ã´·â∏·ãç·äï ·ã≠·âÄ·äï·à∞·ãç ·ã≠...,2023-10-02 16:37:39+00:00,https://t.me/DoctorsET/862,,,10894,0,56,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/oHiSRrNF7I0?si=Absgm414YSt_kjNq,youtu.be,·àû·âµ ·â†·àµ·ä≥·à≠ !·à≥·ã´·àµ·â°·âµ ·ãà·ã∞ ·àû·âµ ·ä•·ã®·âÄ·à®·â° ·ã≠·àÜ·äï? ·àç·åÜ·âª·âΩ·äï·äï ·ä•·ã®·åé·ã≥·äï ·äê·ãç!,#ethiopia #doctorsethiopia #ethiopian \n\n\nMu...,,,,,,,,,,,,,,,,,,,
3,msg_iteration.3.user.DoctorsET.post.861,1102021238,DoctorsET,861,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àü·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì ?\n\n·àô...,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àü·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì ? ·àô·àâ ·â™...,2023-09-16 07:54:32+00:00,https://t.me/DoctorsET/861,,,11306,0,8,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/tTeErZxIh_Q?si=jKHyfWcC3sfXbC8L,youtu.be,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àû·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì,#doctorsethiopia #ethiopia #ethiopian #habesha,,,,,,,,,,,,,,,,,,,
4,msg_iteration.4.user.DoctorsET.post.860,1102021238,DoctorsET,860,·â†·âÖ·à≠·â• ·åä·ãú ·â†·àÉ·åà·à´·âΩ·äï ·àã·ã≠ ·ä•·ã®·â∞·àµ·â∞·ãã·àà ·ã´·àà ·ã®·â∞·àò·à≥·à≥·ã≠ ·çÜ·â≥ ( Homos...,·â†·âÖ·à≠·â• ·åä·ãú ·â†·àÉ·åà·à´·âΩ·äï ·àã·ã≠ ·ä•·ã®·â∞·àµ·â∞·ãã·àà ·ã´·àà ·ã®·â∞·àò·à≥·à≥·ã≠ ·çÜ·â≥ ( Homos...,2023-09-01 16:16:15+00:00,https://t.me/DoctorsET/860,,,13112,0,11,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/0k65P5ouw7s?si=qaUgo75bUa3AMQxD,youtu.be,40%·âµ·ãç·àç·ã± ·ãà·ã∞ Homosexuality ·ä•·ã®·â∞·âÄ·ã®·à® ·äê·ãç?? ·âÄ·ãµ·àû ·ä•·äï·ã¥·âµ ...,,,,,,,,,,,,,,,,,,,,
5,msg_iteration.5.user.DoctorsET.post.859,1102021238,DoctorsET,859,üëáüëáüëáüëáüëáüëá https://youtu.be/-AR1KO2DbFw?si=47cXLZt...,üëáüëáüëáüëáüëáüëá https://youtu.be/-AR1KO2DbFw?si=47cXLZt...,2023-08-29 17:20:05+00:00,https://t.me/DoctorsET/859,,,11703,0,5,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/-AR1KO2DbFw?si=47cXLZtlmhx1Nlqc,youtu.be,·ã®·à¥·â∂·âΩ·äï sex ·çç·àã·åé·âµ ·àõ·ãà·âÖ ·ã´·àà·â•·äï ·äê·åà·àÆ·âΩ / ·ã®·à¥·â∂·âΩ ·ã® sex ·çç·àã·åé·âµ...,#ethiopia #doctorsethiopia #Habesha,,,,,,,,,,,,,,,,,,,
6,msg_iteration.6.user.DoctorsET.post.848,1102021238,DoctorsET,848,·ä≠·à®·àù·â±·äï ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä†·àµ·â†·ãç ·åÇ·àù ·àà·àò·åç·â£·âµ ·ä´·àç·âª·àâ ·â£·àâ·â†·âµ ·â¶·â≥ ·àÜ·äê·ãç...,·ä≠·à®·àù·â±·äï ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä†·àµ·â†·ãç ·åÇ·àù ·àà·àò·åç·â£·âµ ·ä´·àç·âª·àâ ·â£·àâ·â†·âµ ·â¶·â≥ ·àÜ·äê·ãç...,2022-08-02 17:42:08+00:00,https://t.me/DoctorsET/848,,,34738,0,66,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/0uiTzjEbh90,youtu.be,·â†·â§·âµ·ãé ·àÜ·äê·ãç ·ä•·äê·ãö·àÖ·äï ·ä•·äï·âÖ·àµ·âÉ·à¥·ãé·âΩ ·â†·àò·àµ·à´·âµ ·ã®·åâ·äï ·â¶·à≠·åÆ·äï ·â†·âÄ·àã·àâ ·ã´·å•...,#doctorsethiopia #ethiopia #·çã·äì,,,,,,,,,,,,,,,,,,,
7,msg_iteration.7.user.DoctorsET.post.847,1102021238,DoctorsET,847,·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·ã≠·äñ·à≠ ·ã≠·àÜ·äï?\n\n·â†·ä†·àÅ·äë ·ãà·âÖ·âµ ·â•·ãô ·ã®·àµ·çñ·à≠·âµ ·àò·àµ...,·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·ã≠·äñ·à≠ ·ã≠·àÜ·äï? ·â†·ä†·àÅ·äë ·ãà·âÖ·âµ ·â•·ãô ·ã®·àµ·çñ·à≠·âµ ·àò·àµ·à™·ã´ ...,2022-06-12 17:15:47+00:00,https://t.me/DoctorsET/847,,,31942,0,24,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/WPlRuRtQXN8,youtu.be,·ã®·àµ·çñ·à≠·âµ ·à±·àµ /sport Addiction / ·ã®·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·àù·àç·ä≠...,#doctorsethiopia #ethiopia #addiction Pulse Fi...,,,,,,,,,,,,,,,,,,,
8,msg_iteration.8.user.DoctorsET.post.846,1102021238,DoctorsET,846,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã / ·ã®·ä†·å•·äï·âµ ·àµ·â•·à´·âµ\n\n·ä†·ã´·à≠·åà·ãç·äì ·ãµ·äï·åà·â∞·äõ ·ã®·àÜ·äê ·ä†·ã∞·åã...,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã / ·ã®·ä†·å•·äï·âµ ·àµ·â•·à´·âµ ·ä†·ã´·à≠·åà·ãç·äì ·ãµ·äï·åà·â∞·äõ ·ã®·àÜ·äê ·ä†·ã∞·åã ·â¢·ã∞...,2022-05-31 17:51:13+00:00,https://t.me/DoctorsET/846,,,26678,0,10,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/QI-8oqW80uI,youtu.be,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã ·â¢·ã∞·à≠·àµ·â¶ ·ã≠·àÖ·äï 6 ·äê·åà·àÆ·âΩ ·â†·àò·â∞·åç·â†·à≠ ·àÑ·ãà·â∂·äï ·ã´·âµ·à≠·çâ! ·ãµ...,#doctorsethiopia #ethiopia #medical,,,,,,,,,,,,,,,,,,,
9,msg_iteration.9.user.DoctorsET.post.845,1102021238,DoctorsET,845,·ä®·âµ·äï·àΩ ·åç·ãö·ã´·âµ ·â†·çä·âµ ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä•·äï·ã∞ ·âÖ·äï·å¶·âµ ·ã≠·â≥·ã≠ ·äê·â†·à≠ ·ä†·àÅ·äï ·àã...,·ä®·âµ·äï·àΩ ·åç·ãö·ã´·âµ ·â†·çä·âµ ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä•·äï·ã∞ ·âÖ·äï·å¶·âµ ·ã≠·â≥·ã≠ ·äê·â†·à≠ ·ä†·àÅ·äï ·àã...,2022-05-20 18:04:53+00:00,https://t.me/DoctorsET/845,,,23190,0,35,0,,,,,,,0,,,1,MessageMediaWebPage,1,https://youtu.be/_IEWt07bECg,youtu.be,·àµ·çñ·à≠·âµ ·à∞·à≠·â∞·ãç ·àà·ãç·å• ·àõ·àù·å£·âµ ·ä´·àç·âª·àâ ·ã≠·àÖ·äï 3 ·äê·åà·àÆ·âΩ ·àò·â∞·åç·â†·à≠ ·ã≠·åÄ·àù·à©!...,#DoctorsEthiopia #FanaTV #Ethiopia,,,,,,,,,,,,,,,,,,,


# General Statistics

In [6]:
df.size

37044

In [7]:
df.shape

(756, 49)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 49 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   signature                   756 non-null    object 
 1   channel_id                  756 non-null    int64  
 2   channel_name                756 non-null    object 
 3   msg_id                      756 non-null    int64  
 4   message                     605 non-null    object 
 5   cleaned_message             605 non-null    object 
 6   date                        756 non-null    object 
 7   msg_link                    756 non-null    object 
 8   msg_from_peer               0 non-null      float64
 9   msg_from_id                 0 non-null      float64
 10  views                       756 non-null    int64  
 11  number_replies              756 non-null    int64  
 12  number_forwards             756 non-null    int64  
 13  is_forward                  756 non

# Missing Values

In [9]:
percent_missing_values(df)

The dataset contains 66.39 % missing values.


In [10]:
missing_df = missing_values_table(df)

Your selected dataframe has 49 columns.
There are 36 columns that have missing values.


In [11]:
df = df.dropna(how='all',axis=1) 

In [12]:
df.shape

(756, 30)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 756 entries, 0 to 755
Data columns (total 30 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   signature                   756 non-null    object 
 1   channel_id                  756 non-null    int64  
 2   channel_name                756 non-null    object 
 3   msg_id                      756 non-null    int64  
 4   message                     605 non-null    object 
 5   cleaned_message             605 non-null    object 
 6   date                        756 non-null    object 
 7   msg_link                    756 non-null    object 
 8   views                       756 non-null    int64  
 9   number_replies              756 non-null    int64  
 10  number_forwards             756 non-null    int64  
 11  is_forward                  756 non-null    int64  
 12  forward_msg_from_peer_type  7 non-null      object 
 13  forward_msg_from_peer_id    7 non-n

In [14]:
missing_df

Unnamed: 0,Missing Values,% of Total Values,Dtype
venue_provider,756,100.0,float64
contact_userid,756,100.0,float64
contact_phone_number,756,100.0,float64
poll_results,756,100.0,float64
poll_total_voters,756,100.0,float64
poll_question,756,100.0,float64
poll_id,756,100.0,float64
geo_type,756,100.0,float64
lat,756,100.0,float64
lng,756,100.0,float64


In [15]:
count_missing_rows(df)

756 rows(100.0%) contain atleast one missing value.


In [17]:
# number of rows with missing values for group 1
count_missing_rows(df[['forward_msg_link','forward_msg_from_peer_name']])

750 rows(99.21%) contain atleast one missing value.


In [18]:
# dataframe containing the missing rows for columns in group 1
group1_df = df[df['forward_msg_link'].isna()]
group1_df.head(10)

Unnamed: 0,signature,channel_id,channel_name,msg_id,message,cleaned_message,date,msg_link,views,number_replies,number_forwards,is_forward,forward_msg_from_peer_type,forward_msg_from_peer_id,forward_msg_from_peer_name,forward_msg_date,forward_msg_date_string,forward_msg_link,is_reply,contains_media,media_type,has_url,url,domain,url_title,url_description,document_type,document_id,document_video_duration,document_filename
0,msg_iteration.0.user.DoctorsET.post.864,1102021238,DoctorsET,864,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVD...,2023-12-18 17:04:02+00:00,https://t.me/DoctorsET/864,5757,0,3,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/5DBoEm-8kmA?si=LDLuEecNfULJVDzL,youtu.be,·â•·àã·ä≠ ·àõ·à≠·ä¨·âµ ·åà·â†·ã´ ·àõ·äê·ãç ·àö·â∞·àù·äê·ãç/·â¢·ãù·äê·àµ ·à≤·åÄ·àù·à© ·àö·à∞·à©·âµ 3 ·àµ·àÖ·â∞·â∂·âΩ/...,#ethiopia #doctorsethiopia #habesha #ethiopian...,,,,
1,msg_iteration.1.user.DoctorsET.post.863,1102021238,DoctorsET,863,·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â† ·ä†·ã≤·àµ ·ä†·âÄ·à´·à®·â• ·â† ·â¥·àå·â™·ã•·äï ·çï·àÆ·åç·à´·àô·äï ·àà·àò·åÄ·àò·à≠ ·ä®...,·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â† ·ä†·ã≤·àµ ·ä†·âÄ·à´·à®·â• ·â† ·â¥·àå·â™·ã•·äï ·çï·àÆ·åç·à´·àô·äï ·àà·àò·åÄ·àò·à≠ ·ä®...,2023-11-03 16:14:39+00:00,https://t.me/DoctorsET/863,8440,0,5,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/gwVN5eJQpko?si=xARsSxIEdZtE91GY,youtu.be,·ä®·ä†·àú·à™·ä´ ·ãà·ã∞ ·ä¢·âµ·ãÆ·åµ·ã´ / ·åà·äï·ã≥ ·ãç·àµ·å• ·ä®·àò·â∞·äõ·âµ ·ä•·àµ·ä® 500·à∫·àÖ ·â∞·ä®·â≥·ãÆ·âΩ...,#ethiopia #ethiopianmusic #doctorsethiopia #et...,,,,
2,msg_iteration.2.user.DoctorsET.post.862,1102021238,DoctorsET,862,·àû·âµ ·â†·àµ·ä≥·à≠ \n\n·àà·àç·åÜ·âª·âΩ·äï ·ã®·àù·äì·à≤·ãò·ãç ·àù·à≥·âÉ ·à≥·äì·âÄ·ãç ·ä•·ãµ·àö·ã´·â∏·ãç·äï ·ã≠·âÄ·äï...,·àû·âµ ·â†·àµ·ä≥·à≠ ·àà·àç·åÜ·âª·âΩ·äï ·ã®·àù·äì·à≤·ãò·ãç ·àù·à≥·âÉ ·à≥·äì·âÄ·ãç ·ä•·ãµ·àö·ã´·â∏·ãç·äï ·ã≠·âÄ·äï·à∞·ãç ·ã≠...,2023-10-02 16:37:39+00:00,https://t.me/DoctorsET/862,10894,0,56,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/oHiSRrNF7I0?si=Absgm414YSt_kjNq,youtu.be,·àû·âµ ·â†·àµ·ä≥·à≠ !·à≥·ã´·àµ·â°·âµ ·ãà·ã∞ ·àû·âµ ·ä•·ã®·âÄ·à®·â° ·ã≠·àÜ·äï? ·àç·åÜ·âª·âΩ·äï·äï ·ä•·ã®·åé·ã≥·äï ·äê·ãç!,#ethiopia #doctorsethiopia #ethiopian \n\n\nMu...,,,,
3,msg_iteration.3.user.DoctorsET.post.861,1102021238,DoctorsET,861,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àü·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì ?\n\n·àô...,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àü·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì ? ·àô·àâ ·â™...,2023-09-16 07:54:32+00:00,https://t.me/DoctorsET/861,11306,0,8,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/tTeErZxIh_Q?si=jKHyfWcC3sfXbC8L,youtu.be,·ä® HIV ·ã®·â∞·çà·ãà·à∞ ·à∞·ãç ·ä†·åã·å•·àû·âΩ·àÅ ·ã´·âÉ·àç ? ·çà·ãç·àµ ·ä•·äì ·àÖ·ä≠·àù·äì,#doctorsethiopia #ethiopia #ethiopian #habesha,,,,
4,msg_iteration.4.user.DoctorsET.post.860,1102021238,DoctorsET,860,·â†·âÖ·à≠·â• ·åä·ãú ·â†·àÉ·åà·à´·âΩ·äï ·àã·ã≠ ·ä•·ã®·â∞·àµ·â∞·ãã·àà ·ã´·àà ·ã®·â∞·àò·à≥·à≥·ã≠ ·çÜ·â≥ ( Homos...,·â†·âÖ·à≠·â• ·åä·ãú ·â†·àÉ·åà·à´·âΩ·äï ·àã·ã≠ ·ä•·ã®·â∞·àµ·â∞·ãã·àà ·ã´·àà ·ã®·â∞·àò·à≥·à≥·ã≠ ·çÜ·â≥ ( Homos...,2023-09-01 16:16:15+00:00,https://t.me/DoctorsET/860,13112,0,11,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/0k65P5ouw7s?si=qaUgo75bUa3AMQxD,youtu.be,40%·âµ·ãç·àç·ã± ·ãà·ã∞ Homosexuality ·ä•·ã®·â∞·âÄ·ã®·à® ·äê·ãç?? ·âÄ·ãµ·àû ·ä•·äï·ã¥·âµ ...,,,,,
5,msg_iteration.5.user.DoctorsET.post.859,1102021238,DoctorsET,859,üëáüëáüëáüëáüëáüëá https://youtu.be/-AR1KO2DbFw?si=47cXLZt...,üëáüëáüëáüëáüëáüëá https://youtu.be/-AR1KO2DbFw?si=47cXLZt...,2023-08-29 17:20:05+00:00,https://t.me/DoctorsET/859,11703,0,5,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/-AR1KO2DbFw?si=47cXLZtlmhx1Nlqc,youtu.be,·ã®·à¥·â∂·âΩ·äï sex ·çç·àã·åé·âµ ·àõ·ãà·âÖ ·ã´·àà·â•·äï ·äê·åà·àÆ·âΩ / ·ã®·à¥·â∂·âΩ ·ã® sex ·çç·àã·åé·âµ...,#ethiopia #doctorsethiopia #Habesha,,,,
6,msg_iteration.6.user.DoctorsET.post.848,1102021238,DoctorsET,848,·ä≠·à®·àù·â±·äï ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä†·àµ·â†·ãç ·åÇ·àù ·àà·àò·åç·â£·âµ ·ä´·àç·âª·àâ ·â£·àâ·â†·âµ ·â¶·â≥ ·àÜ·äê·ãç...,·ä≠·à®·àù·â±·äï ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä†·àµ·â†·ãç ·åÇ·àù ·àà·àò·åç·â£·âµ ·ä´·àç·âª·àâ ·â£·àâ·â†·âµ ·â¶·â≥ ·àÜ·äê·ãç...,2022-08-02 17:42:08+00:00,https://t.me/DoctorsET/848,34738,0,66,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/0uiTzjEbh90,youtu.be,·â†·â§·âµ·ãé ·àÜ·äê·ãç ·ä•·äê·ãö·àÖ·äï ·ä•·äï·âÖ·àµ·âÉ·à¥·ãé·âΩ ·â†·àò·àµ·à´·âµ ·ã®·åâ·äï ·â¶·à≠·åÆ·äï ·â†·âÄ·àã·àâ ·ã´·å•...,#doctorsethiopia #ethiopia #·çã·äì,,,,
7,msg_iteration.7.user.DoctorsET.post.847,1102021238,DoctorsET,847,·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·ã≠·äñ·à≠ ·ã≠·àÜ·äï?\n\n·â†·ä†·àÅ·äë ·ãà·âÖ·âµ ·â•·ãô ·ã®·àµ·çñ·à≠·âµ ·àò·àµ...,·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·ã≠·äñ·à≠ ·ã≠·àÜ·äï? ·â†·ä†·àÅ·äë ·ãà·âÖ·âµ ·â•·ãô ·ã®·àµ·çñ·à≠·âµ ·àò·àµ·à™·ã´ ...,2022-06-12 17:15:47+00:00,https://t.me/DoctorsET/847,31942,0,24,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/WPlRuRtQXN8,youtu.be,·ã®·àµ·çñ·à≠·âµ ·à±·àµ /sport Addiction / ·ã®·àµ·çñ·à≠·âµ ·ã®·àò·àµ·à´·âµ ·à±·àµ ·àù·àç·ä≠...,#doctorsethiopia #ethiopia #addiction Pulse Fi...,,,,
8,msg_iteration.8.user.DoctorsET.post.846,1102021238,DoctorsET,846,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã / ·ã®·ä†·å•·äï·âµ ·àµ·â•·à´·âµ\n\n·ä†·ã´·à≠·åà·ãç·äì ·ãµ·äï·åà·â∞·äõ ·ã®·àÜ·äê ·ä†·ã∞·åã...,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã / ·ã®·ä†·å•·äï·âµ ·àµ·â•·à´·âµ ·ä†·ã´·à≠·åà·ãç·äì ·ãµ·äï·åà·â∞·äõ ·ã®·àÜ·äê ·ä†·ã∞·åã ·â¢·ã∞...,2022-05-31 17:51:13+00:00,https://t.me/DoctorsET/846,26678,0,10,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/QI-8oqW80uI,youtu.be,·ãµ·äï·åà·â∞·äõ ·ä†·ã∞·åã ·â¢·ã∞·à≠·àµ·â¶ ·ã≠·àÖ·äï 6 ·äê·åà·àÆ·âΩ ·â†·àò·â∞·åç·â†·à≠ ·àÑ·ãà·â∂·äï ·ã´·âµ·à≠·çâ! ·ãµ...,#doctorsethiopia #ethiopia #medical,,,,
9,msg_iteration.9.user.DoctorsET.post.845,1102021238,DoctorsET,845,·ä®·âµ·äï·àΩ ·åç·ãö·ã´·âµ ·â†·çä·âµ ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä•·äï·ã∞ ·âÖ·äï·å¶·âµ ·ã≠·â≥·ã≠ ·äê·â†·à≠ ·ä†·àÅ·äï ·àã...,·ä®·âµ·äï·àΩ ·åç·ãö·ã´·âµ ·â†·çä·âµ ·àµ·çñ·à≠·âµ ·àò·àµ·à´·âµ ·ä•·äï·ã∞ ·âÖ·äï·å¶·âµ ·ã≠·â≥·ã≠ ·äê·â†·à≠ ·ä†·àÅ·äï ·àã...,2022-05-20 18:04:53+00:00,https://t.me/DoctorsET/845,23190,0,35,0,,,,,,,0,1,MessageMediaWebPage,1,https://youtu.be/_IEWt07bECg,youtu.be,·àµ·çñ·à≠·âµ ·à∞·à≠·â∞·ãç ·àà·ãç·å• ·àõ·àù·å£·âµ ·ä´·àç·âª·àâ ·ã≠·àÖ·äï 3 ·äê·åà·àÆ·âΩ ·àò·â∞·åç·â†·à≠ ·ã≠·åÄ·àù·à©!...,#DoctorsEthiopia #FanaTV #Ethiopia,,,,


In [19]:
# all unique values for each column
unique_values_df(group1_df)

Unnamed: 0,Column,Unique values
0,signature,"[msg_iteration.0.user.DoctorsET.post.864, msg_..."
1,channel_id,[1102021238]
2,channel_name,[DoctorsET]
3,msg_id,"[864, 284, 294, 292, 291, 290, 289, 288, 287, ..."
4,message,"[#Dailytips \n#DOCTORSETHIOPIA, ·ä•·àµ·ä® ·âµ·àã·äï·âµ ·ãµ·à®·àµ ·ã®..."
5,cleaned_message,"[#Dailytips #DOCTORSETHIOPIA, ·ä•·àµ·ä® ·âµ·àã·äï·âµ ·ãµ·à®·àµ ·ã® ·â´..."
6,date,"[2022-02-04 18:22:07+00:00, 2022-02-25 17:46:0..."
7,msg_link,"[https://t.me/DoctorsET/864, https://t.me/Doct..."
8,views,"[1382, 1430, 3485, 2377, 2379, 2725, 1410, 270..."
9,number_replies,[0]


In [20]:
group1_df['number_replies'].value_counts()

number_replies
0    750
Name: count, dtype: int64

In [21]:
df['number_replies'].value_counts()

number_replies
0    756
Name: count, dtype: int64

In [23]:
df[df['number_replies'] == 0].shape

(756, 30)

In [24]:
df[df['is_reply'] == 0].shape

(756, 30)

In [26]:
fix_missing_value(df, ['number_replies', 'is_reply'], 0)

0 missing values in the column number_replies have been replaced by 0.
0 missing values in the column is_reply have been replaced by 0.


In [36]:
fix_missing_value(df, ['message', 'cleaned_message'], 0)

151 missing values in the column message have been replaced by 0.
151 missing values in the column cleaned_message have been replaced by 0.


In [35]:
df.sample(10)

Unnamed: 0,signature,channel_id,channel_name,msg_id,message,cleaned_message,date,msg_link,views,number_replies,number_forwards,is_forward,forward_msg_from_peer_type,forward_msg_from_peer_id,forward_msg_from_peer_name,forward_msg_date,forward_msg_date_string,forward_msg_link,is_reply,contains_media,media_type,has_url,url,domain,url_title,url_description,document_type,document_id,document_video_duration,document_filename
162,msg_iteration.164.user.DoctorsET.post.682,1102021238,DoctorsET,682,·ä≠·çç·àç 2 ·â∞·àà·âÄ·âÄ\n\n·âÄ·àã·àç·äì ·ä™·àµ ·ã®·àõ·ã≠·åé·ã± ·å§·äì·àõ ·ã®·ä†·àò·åã·åà·â• ·àò·äï·åà·ã∂·âΩ\n...,·ä≠·çç·àç 2 ·â∞·àà·âÄ·âÄ ·âÄ·àã·àç·äì ·ä™·àµ ·ã®·àõ·ã≠·åé·ã± ·å§·äì·àõ ·ã®·ä†·àò·åã·åà·â• ·àò·äï·åà·ã∂·âΩ ·ä¨·â∂·åÑ·äí...,2021-06-19 16:24:09+00:00,https://t.me/DoctorsET/682,13937,0,28,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/mATGorxkJSY,youtu.be,·ä≠·çç·àç 2 ·ä†·äê·ãö·àÖ·äï ·àù·åç·â¶·âΩ ·â†·âÄ·àã·àâ ·â†·àò·àò·åà·â• ·å§·äì·ãé·äï ·ã≠·å†·â•·âÅ! ·ãç·çç·à®·âµ ·â† ...,#DoctorsEthiopia #MebaEntertainmnet #FanaTelev...,0,0.0,0.0,0
518,msg_iteration.553.user.DoctorsET.post.270,1102021238,DoctorsET,270,,,2020-01-28 14:42:42+00:00,https://t.me/DoctorsET/270,2407,0,1,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
27,msg_iteration.27.user.DoctorsET.post.825,1102021238,DoctorsET,825,,,2022-04-22 17:51:04+00:00,https://t.me/DoctorsET/825,11744,0,13,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
101,msg_iteration.102.user.DoctorsET.post.747,1102021238,DoctorsET,747,,,2022-02-04 18:22:07+00:00,https://t.me/DoctorsET/747,8792,0,5,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
322,msg_iteration.336.user.DoctorsET.post.503,1102021238,DoctorsET,503,·ã≠·àÑ·äï ·ä•·äï·ã≥·ãÆ ·ãç·àÉ ·ã≠·å†·å°\n ·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´,·ã≠·àÑ·äï ·ä•·äï·ã≥·ãÆ ·ãç·àÉ ·ã≠·å†·å° ·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´,2020-09-20 07:35:26+00:00,https://t.me/DoctorsET/503,5265,0,21,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
689,msg_iteration.734.user.DoctorsET.post.75,1102021238,DoctorsET,75,,,2019-10-23 10:57:28+00:00,https://t.me/DoctorsET/75,1298,0,1,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
182,msg_iteration.184.user.DoctorsET.post.662,1102021238,DoctorsET,662,·à¥·âæ·âΩ ·ä•·äì ·ãï·ãµ·àú\n·ã®·à∞·ãç ·àç·åÖ ·ä®·ä†·àò·âµ ·ãà·ã∞ ·ä†·àò·âµ ·ãï·ãµ·àú ·ä•·ã®·å®·àò·à® ·ã≠·àÑ·ã≥·àç ...,·à¥·âæ·âΩ ·ä•·äì ·ãï·ãµ·àú ·ã®·à∞·ãç ·àç·åÖ ·ä®·ä†·àò·âµ ·ãà·ã∞ ·ä†·àò·âµ ·ãï·ãµ·àú ·ä•·ã®·å®·àò·à® ·ã≠·àÑ·ã≥·àç ·ã≠...,2021-05-15 17:13:50+00:00,https://t.me/DoctorsET/662,12777,0,19,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/Reiw_VwqJOo,youtu.be,·à¥·â∂·âΩ ·ä•·äì ·ä•·ãµ·àú // ·ä•·ãµ·àú ·à≤·å®·àù·à≠ ·àò·å†·äï·âÄ·âÖ ·ã´·àà·â•·äï ·äê·åà·àÆ·âΩ//,#DoctorsEthiopia #FanaTelevision #MebaEntertai...,0,0.0,0.0,0
181,msg_iteration.183.user.DoctorsET.post.663,1102021238,DoctorsET,663,·ã®·äÆ·àÆ·äì ·ä≠·âµ·â£·âµ ·ãà·à∞·ãµ·äïüëáüëá\nhttps://youtu.be/vYRhvkbtt0Q,·ã®·äÆ·àÆ·äì ·ä≠·âµ·â£·âµ ·ãà·à∞·ãµ·äïüëáüëá https://youtu.be/vYRhvkbtt0Q,2021-05-16 18:33:39+00:00,https://t.me/DoctorsET/663,12214,0,8,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/vYRhvkbtt0Q,youtu.be,·ã®·äê·à≠·â≠ ·àÖ·àò·àù /·ã®·äÆ·àÆ·äì ·ä≠·âµ·â£·âµ ·ãà·àµ·ã∞·äì·àç/ ·ä®·âµ·â£·âµ ·ä®·àò·ãç·à∞·ã≥·âΩ·äï ·â†·çä·âµ ·àõ·ãà...,#DoctorsEthiopia #FanaTelevision #MebaEntertai...,0,0.0,0.0,0
662,msg_iteration.706.user.DoctorsET.post.109,1102021238,DoctorsET,109,,,2019-11-04 16:54:34+00:00,https://t.me/DoctorsET/109,1779,0,2,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
630,msg_iteration.674.user.DoctorsET.post.143,1102021238,DoctorsET,143,·ä†·àÅ·äï ·â†·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ \n·àõ·â≥ ·àã·ã≠ ·àò·â•·à´·âµ ·ä†·å•·çç·â∂ ·àµ·àç·äÆ·âΩ·äï ·àã·çï·â∂·çï ...,·ä†·àÅ·äï ·â†·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·àõ·â≥ ·àã·ã≠ ·àò·â•·à´·âµ ·ä†·å•·çç·â∂ ·àµ·àç·äÆ·âΩ·äï ·àã·çï·â∂·çï ·àò·å†...,2019-11-14 18:05:54+00:00,https://t.me/DoctorsET/143,2049,0,1,0,0,0.0,0,0,0,0,0,0,,0,0,0,0,0,0,0.0,0.0,0


In [37]:
percent_missing_values(df)

The dataset contains 0.64 % missing values.


In [38]:
missing_df = missing_values_table(df)

Your selected dataframe has 30 columns.
There are 1 columns that have missing values.


In [39]:
missing_df

Unnamed: 0,Missing Values,% of Total Values,Dtype
media_type,146,19.31,object


In [40]:
fix_missing_value(df, ['media_type'], 0)

146 missing values in the column media_type have been replaced by 0.


In [41]:
percent_missing_values(df)

The dataset contains 0.0 % missing values.


# Data Types

In [42]:
show_cols_mixed_dtypes(df)

                        Column      Data type
0                      message  mixed-integer
1              cleaned_message  mixed-integer
2   forward_msg_from_peer_type  mixed-integer
3   forward_msg_from_peer_name  mixed-integer
4             forward_msg_date  mixed-integer
5      forward_msg_date_string  mixed-integer
6             forward_msg_link  mixed-integer
7                   media_type  mixed-integer
8                          url  mixed-integer
9                       domain  mixed-integer
10                   url_title  mixed-integer
11             url_description  mixed-integer
12               document_type  mixed-integer
13           document_filename  mixed-integer


In [43]:
df.dtypes

signature                      object
channel_id                      int64
channel_name                   object
msg_id                          int64
message                        object
cleaned_message                object
date                           object
msg_link                       object
views                           int64
number_replies                  int64
number_forwards                 int64
is_forward                      int64
forward_msg_from_peer_type     object
forward_msg_from_peer_id      float64
forward_msg_from_peer_name     object
forward_msg_date               object
forward_msg_date_string        object
forward_msg_link               object
is_reply                        int64
contains_media                  int64
media_type                     object
has_url                         int64
url                            object
domain                         object
url_title                      object
url_description                object
document_typ

In [44]:
# get the columns with object data type
string_columns = df.select_dtypes(include='object').columns.tolist()
string_columns

['signature',
 'channel_name',
 'message',
 'cleaned_message',
 'date',
 'msg_link',
 'forward_msg_from_peer_type',
 'forward_msg_from_peer_name',
 'forward_msg_date',
 'forward_msg_date_string',
 'forward_msg_link',
 'media_type',
 'url',
 'domain',
 'url_title',
 'url_description',
 'document_type',
 'document_filename']

In [45]:
convert_to_string(df, string_columns)

In [46]:
df.sample(10)

Unnamed: 0,signature,channel_id,channel_name,msg_id,message,cleaned_message,date,msg_link,views,number_replies,number_forwards,is_forward,forward_msg_from_peer_type,forward_msg_from_peer_id,forward_msg_from_peer_name,forward_msg_date,forward_msg_date_string,forward_msg_link,is_reply,contains_media,media_type,has_url,url,domain,url_title,url_description,document_type,document_id,document_video_duration,document_filename
208,msg_iteration.210.user.DoctorsET.post.633,1102021238,DoctorsET,633,·ã∂/·à≠ ·ãÆ·àÉ·äï·àµ ·àµ·àà ·äê·à≠·â≠ ·àÖ·àò·àù ·àù·äï ·ã≠·àã·àç  ·ä†·â•·ãõ·äõ·ãç·äï ·åç·ãú ·àÖ·ä≠·àù·äì·ãç ·ä®·â£...,·ã∂/·à≠ ·ãÆ·àÉ·äï·àµ ·àµ·àà ·äê·à≠·â≠ ·àÖ·àò·àù ·àù·äï ·ã≠·àã·àç ·ä†·â•·ãõ·äõ·ãç·äï ·åç·ãú ·àÖ·ä≠·àù·äì·ãç ·ä®·â£·ãµ...,2021-03-19 19:06:15+00:00,https://t.me/DoctorsET/633,16374,0,17,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/hUZtBiDLrQo,youtu.be,Doctors Ethiopia : ·ã®·äê·à≠·â≠ ·âΩ·åç·à≠ ·â†·åç·ãú ·àù·àç·ä≠·â∂·âπ ·ä´·àç·â≥·ãà·âÄ ·àÖ·ä≠...,#DoctorsEthiopia #kestedamenaFoam #FanaTelevision,0,0.0,0.0,0
156,msg_iteration.158.user.DoctorsET.post.688,1102021238,DoctorsET,688,·ä≠·çç·àç 2 ·â∞·àà·âÄ·âÄ! ·àµ·äê ·àù·åç·â• ! ·â†·â•·ãô ·àõ·àÖ·â†·à´·ãä ·ãµ·àÖ·à® ·åà·çÜ·âΩ ·ã® ·àô·ãù ...,·ä≠·çç·àç 2 ·â∞·àà·âÄ·âÄ! ·àµ·äê ·àù·åç·â• ! ·â†·â•·ãô ·àõ·àÖ·â†·à´·ãä ·ãµ·àÖ·à® ·åà·çÜ·âΩ ·ã® ·àô·ãù ·å•·âÖ...,2021-07-09 18:50:23+00:00,https://t.me/DoctorsET/688,13894,0,14,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/4IOZAxRRg8I,youtu.be,·ä≠·çç·àç 2 ·àõ·äï·äõ·ãç·àù ·à∞·ãç ·àä·àò·åà·â†·ãç ·ã®·àö·âΩ·àç·ãç ·ã®·àù·åç·â• ·ä†·ã≠·äê·âµ ·å§·äì·ãé·äï ·ã≠·å†·â•·âÅ...,#DoctorsEthiopia #FanaTelevision #MebaEntertai...,0,0.0,0.0,0
369,msg_iteration.394.user.DoctorsET.post.445,1102021238,DoctorsET,445,·â†·ãö·àÖ ·ä´·à≠·â≥ ·àµ·à≠·å≠·â±·äï ·ã≠·àò·àç·ä®·â± ·ä®·àù·äï·åç·ãú·ãç·àù ·â†·àã·ã≠ ·ä•·äï·å†·äï·âÄ·âÖ !,·â†·ãö·àÖ ·ä´·à≠·â≥ ·àµ·à≠·å≠·â±·äï ·ã≠·àò·àç·ä®·â± ·ä®·àù·äï·åç·ãú·ãç·àù ·â†·àã·ã≠ ·ä•·äï·å†·äï·âÄ·âÖ !,2020-06-07 10:41:00+00:00,https://t.me/DoctorsET/445,2827,0,3,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
262,msg_iteration.267.user.DoctorsET.post.573,1102021238,DoctorsET,573,·ä®·â£·ãµ ·ã®·à´·àµ ·àù·â≥·âµ ·àÖ·àò·àù ·àò·äï·àµ·ä§·ãç·äì ·àò·çç·âµ·àî·ãç YouTube #subsc...,·ä®·â£·ãµ ·ã®·à´·àµ ·àù·â≥·âµ ·àÖ·àò·àù ·àò·äï·àµ·ä§·ãç·äì ·àò·çç·âµ·àî·ãç YouTube #subscrib...,2020-11-13 19:23:16+00:00,https://t.me/DoctorsET/573,13987,0,27,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/PtKeAxXiGgg,youtu.be,Doctors Ethiopia : ·ä®·â£·ãµ ·ã®·à´·àµ ·àù·â≥·âµ ·âΩ·åç·à≠ ·ã®·àö·ã´·àµ·ä®·àµ·â∞·ãç ·ã® ...,#DoctorsEthiopia #FANATV #subscribe ·â† ·â¥·àå·åç·à´·àù ·ã≠·âÄ...,0,0.0,0.0,0
275,msg_iteration.281.user.DoctorsET.post.559,1102021238,DoctorsET,559,·ãµ·äï·åà·âµ ·à∞·ãã·âΩ ·ã≠·ãà·ãµ·âÅ·äì ·ä†·â•·ãõ·äõ·ãç ·àõ·àÖ·â†·à®·à∞·â£·âΩ·äï ·ã®·àö·çà·å•·à®·ãç ·ãµ·äï·åã·å§  ·àÉ·ä™...,·ãµ·äï·åà·âµ ·à∞·ãã·âΩ ·ã≠·ãà·ãµ·âÅ·äì ·ä†·â•·ãõ·äõ·ãç ·àõ·àÖ·â†·à®·à∞·â£·âΩ·äï ·ã®·àö·çà·å•·à®·ãç ·ãµ·äï·åã·å§ ·àÉ·ä™·àû·âΩ...,2020-11-01 05:58:44+00:00,https://t.me/DoctorsET/559,7718,0,8,0,0,0.0,0,0,0,0,0,1,MessageMediaDocument,0,0,0,0,0,video/mp4,5.832637e+18,57.0,0
32,msg_iteration.32.user.DoctorsET.post.820,1102021238,DoctorsET,820,0,0,2022-04-15 18:06:19+00:00,https://t.me/DoctorsET/820,13691,0,5,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0
245,msg_iteration.248.user.DoctorsET.post.593,1102021238,DoctorsET,593,·ãç·à≠·åÉ (Abortion ) ·ä® ·ãà·àä·ãµ ·åã·à≠ ·â∞·ã´·ã≠·ãû ·ã®·àö·àò·å£ ·âΩ·åç·à≠ ·â†·ä†·àÅ·äë ...,·ãç·à≠·åÉ (Abortion ) ·ä® ·ãà·àä·ãµ ·åã·à≠ ·â∞·ã´·ã≠·ãû ·ã®·àö·àò·å£ ·âΩ·åç·à≠ ·â†·ä†·àÅ·äë ·à∞·ä†...,2020-12-07 17:20:26+00:00,https://t.me/DoctorsET/593,11214,0,14,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
295,msg_iteration.304.user.DoctorsET.post.535,1102021238,DoctorsET,535,·ãç·ãµ ·ã® ·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â§·â∞·à∞·â¶·âΩ ·àµ·àà ·ãà·àä·ãµ ·àò·âÜ·å£·å†·à™·ã´ ·ã´·àã·âΩ·àÅ·äï ·å•·ã´·âÑ...,·ãç·ãµ ·ã® ·ã∂·ä≠·â∞·à≠·àµ ·ä¢·âµ·ãÆ·åµ·ã´ ·â§·â∞·à∞·â¶·âΩ ·àµ·àà ·ãà·àä·ãµ ·àò·âÜ·å£·å†·à™·ã´ ·ã´·àã·âΩ·àÅ·äï ·å•·ã´·âÑ...,2020-10-22 06:39:37+00:00,https://t.me/DoctorsET/535,5220,0,0,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://t.me/joinchat/AAAAAEGvgnYXOueEE2wSKg,t.me,Doctors Ethiopia,The First medical entertainment Tv show coming...,0,0.0,0.0,0
238,msg_iteration.241.user.DoctorsET.post.601,1102021238,DoctorsET,601,·ã≤·çï·à¨·àΩ·äï ·ä® ·ä´·äï·à∞·à≠ ·ã≠·àç·âÖ ·ä†·àµ·ä®·çä ·â†·àΩ·â≥ ·äê·ãç ·àò·çã·âµ·àÑ·ãç ·â† ·çï·àÆ·çå·à∞·à≠ ·ä†·â≥·àã...,·ã≤·çï·à¨·àΩ·äï ·ä® ·ä´·äï·à∞·à≠ ·ã≠·àç·âÖ ·ä†·àµ·ä®·çä ·â†·àΩ·â≥ ·äê·ãç ·àò·çã·âµ·àÑ·ãç ·â† ·çï·àÆ·çå·à∞·à≠ ·ä†·â≥·àã...,2020-12-26 08:37:25+00:00,https://t.me/DoctorsET/601,11635,0,8,0,0,0.0,0,0,0,0,0,1,MessageMediaWebPage,1,https://youtu.be/izSE5roHiCM,youtu.be,Doctors Ethiopia: ·ä® ·ã≤·çï·à¨·àΩ·äï ·ä•·äï·ã¥·âµ ·äê·åª ·àò·àÜ·äï ·ã≠·âª·àã·àç// ·ä®...,#MebaEntertaimnet #DoctorsEthiopia # FanaTelev...,0,0.0,0.0,0
509,msg_iteration.542.user.DoctorsET.post.282,1102021238,DoctorsET,282,Updated. Source VoA Doctors Ethiopia¬© Meba Ent...,Updated. Source VoA Doctors Ethiopia¬© Meba Ent...,2020-02-02 05:56:02+00:00,https://t.me/DoctorsET/282,2249,0,0,0,0,0.0,0,0,0,0,0,1,MessageMediaPhoto,0,0,0,0,0,0,0.0,0.0,0


In [47]:
convert_to_datetime(df, ['date'])

In [48]:
show_cols_mixed_dtypes(df)

None of the columns contain mixed types.


In [49]:
df.dtypes

signature                          string[python]
channel_id                                  int64
channel_name                       string[python]
msg_id                                      int64
message                            string[python]
cleaned_message                    string[python]
date                          datetime64[ns, UTC]
msg_link                           string[python]
views                                       int64
number_replies                              int64
number_forwards                             int64
is_forward                                  int64
forward_msg_from_peer_type         string[python]
forward_msg_from_peer_id                  float64
forward_msg_from_peer_name         string[python]
forward_msg_date                   string[python]
forward_msg_date_string            string[python]
forward_msg_link                   string[python]
is_reply                                    int64
contains_media                              int64


# Duplicates

In [50]:
# search for duplicate rows and drop them
drop_duplicates(df)

No duplicate rows were found.


In [51]:
df.duplicated(subset=['signature']).all()

False

In [55]:
file_handler.to_csv(df, '../data/new/DoctorsET.csv')