In [1]:
import pandas as pd

# Read the JSONL file into a DataFrame
df = pd.read_json('QaranJobs.so.jsonl', lines=True)

# Convert DataFrame to a list of dictionaries
records = df.to_dict(orient='records')

# Normalize the posts data
df_posts = pd.json_normalize(records, errors='ignore')

# Normalize the comments data, adding prefixes to avoid key conflicts
df_comments = pd.json_normalize(
    records,
    record_path=['comments', 'data'],
    meta=[
        'id',
        'created_time',
        'permalink_url',
        ['shares', 'count'],
        'message'
    ],
    meta_prefix='post_',
    errors='ignore'
)

# Normalize the reactions data from comments, adding prefixes to avoid key conflicts
df_reactions = pd.json_normalize(
    records,
    record_path=['comments', 'data', 'reactions', 'data'],
    meta=[
        'id',
        ['comments', 'data', 'id', 'created_time', 'message']
    ],
    meta_prefix='comment_',
    errors='ignore'
)

# To show the DataFrame for posts
print(df_posts.head())

# To show the DataFrame for comments
print(df_comments.head())

# To show the DataFrame for reactions to comments
print(df_reactions.head())

  return values.astype(dtype, copy=copy)


             id              created_time  \
0  1.905016e+29 2024-03-31 11:40:46+00:00   
1  1.905016e+29 2024-03-31 09:02:41+00:00   
2  1.905016e+29 2024-03-31 08:15:21+00:00   
3  1.905016e+29 2024-03-31 06:36:19+00:00   
4  1.905016e+29 2024-03-30 13:08:50+00:00   

                                       permalink_url  \
0  https://www.facebook.com/100069752805511/posts...   
1  https://www.facebook.com/100069752805511/posts...   
2  https://www.facebook.com/100069752805511/posts...   
3  https://www.facebook.com/100069752805511/posts...   
4  https://www.facebook.com/100069752805511/posts...   

                                        full_picture  \
0  https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...   
1  https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...   
2  https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...   
3  https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...   
4  https://scontent.fmgq1-2.fna.fbcdn.net/v/t45.1...   

                                             message  a

In [26]:
total_nan_count = df.isnull().sum().sum()
total_nan_count

7163

In [27]:
# Count NaN values in each column
nan_counts_per_column = df.isna().sum()
nan_counts_per_column

id                  0
created_time        0
permalink_url       0
full_picture        1
shares           2088
reactions           0
message             0
comments            0
attachments      5074
dtype: int64

In [2]:
df.head()

Unnamed: 0,id,created_time,permalink_url,full_picture,shares,reactions,message,comments,attachments
0,1.905016e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts...,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...,{'count': 3},"{'data': [], 'summary': {'total_count': 30}}",Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Cl...,{'data': [{'id': '726961929638894_324291780293...,
1,1.905016e+29,2024-03-31 09:02:41+00:00,https://www.facebook.com/100069752805511/posts...,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...,,"{'data': [], 'summary': {'total_count': 28}}",𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka ...,{'data': [{'id': '726894056312348_478021441447...,
2,1.905016e+29,2024-03-31 08:15:21+00:00,https://www.facebook.com/100069752805511/posts...,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...,{'count': 3},"{'data': [], 'summary': {'total_count': 22}}",Fursad Shaqo:\nDhamaan Dadwaynaha Ku Nool Deeg...,{'data': [{'id': '726875066314247_433778902458...,
3,1.905016e+29,2024-03-31 06:36:19+00:00,https://www.facebook.com/100069752805511/posts...,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.3...,{'count': 1},"{'data': [], 'summary': {'total_count': 17}}",4 Fursado Shaqo Ah:\nSalaam Somali Bank Waxaa ...,"{'data': [], 'summary': {'total_count': 0}}",
4,1.905016e+29,2024-03-30 13:08:50+00:00,https://www.facebook.com/100069752805511/posts...,https://scontent.fmgq1-2.fna.fbcdn.net/v/t45.1...,{'count': 9},"{'data': [], 'summary': {'total_count': 161}}",6 Fursado Shaqo Ah:\nHay'adda Save the Childre...,{'data': [{'id': '726381423030278_114188169057...,


In [29]:
df_posts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5075 entries, 0 to 5074
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   id                             5075 non-null   float64            
 1   created_time                   5075 non-null   datetime64[ns, UTC]
 2   permalink_url                  5075 non-null   object             
 3   full_picture                   5074 non-null   object             
 4   message                        5075 non-null   object             
 5   attachments                    0 non-null      float64            
 6   shares.count                   2987 non-null   float64            
 7   reactions.data                 5075 non-null   object             
 8   reactions.summary.total_count  5075 non-null   int64              
 9   comments.data                  5075 non-null   object             
 10  comments.summary.total_c

In [30]:
df_comments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3581 entries, 0 to 3580
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   id                             3581 non-null   object             
 1   created_time                   3581 non-null   object             
 2   message                        3581 non-null   object             
 3   comment_count                  3581 non-null   int64              
 4   reactions.data                 3581 non-null   object             
 5   reactions.summary.total_count  3581 non-null   int64              
 6   comments.data                  429 non-null    object             
 7   post_id                        3581 non-null   object             
 8   post_created_time              3581 non-null   datetime64[ns, UTC]
 9   post_permalink_url             3581 non-null   object             
 10  post_shares.count       

In [31]:
df_reactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 819 entries, 0 to 818
Data columns (total 5 columns):
 #   Column                                         Non-Null Count  Dtype 
---  ------                                         --------------  ----- 
 0   id                                             819 non-null    object
 1   name                                           819 non-null    object
 2   type                                           819 non-null    object
 3   comment_id                                     819 non-null    object
 4   comment_comments.data.id.created_time.message  0 non-null      object
dtypes: object(5)
memory usage: 32.1+ KB


In [32]:
df_posts.tail()

Unnamed: 0,id,created_time,permalink_url,full_picture,message,attachments,shares.count,reactions.data,reactions.summary.total_count,comments.data,comments.summary.total_count,shares,attachments.data
5070,1.905016e+30,2022-10-30 03:49:07+00:00,https://www.facebook.com/190501611569959/posts/pfbid0YH6UZWAZqd4ervxWfFCeBkBaj9VUxLbaahgko6wsGp8Cp7Ntu158eJmtwu2KF7CCl/,https://external.fmgq1-2.fna.fbcdn.net/emg1/v/t13/1766670028170992400?url=https%3A%2F%2Fqaranjobs.com%2Fwp-content%2Fuploads%2F2020%2F02%2FGREDO.jpg&fb_obo=1&utld=qaranjobs.com&ccb=13-1&stp=dst-emg0_q75&ur=50234c&_nc_sid=64c8fc&oh=06_AX1U5AxtqsBypo7GZ6x50xsw_4-LRkPPSw-VH0pMAjqp7Q&oe=660F2988,"Community Health Worker (4 Positions) – Bardhere, Luuq – Somalia\nhttps://qaranjobs.com/job/community-health-worker-4-positions/",,3.0,[],2,[],0,,
5071,1.905016e+30,2022-10-30 03:48:18+00:00,https://www.facebook.com/190501611569959/posts/pfbid0NshYqP25ZQDumabzK4dRptuMiPFZJBMzKs2t9v3NEz2ofyo4baW1aF4GLvq73VVhl/,https://external.fmgq1-2.fna.fbcdn.net/emg1/v/t13/1766670028170992400?url=https%3A%2F%2Fqaranjobs.com%2Fwp-content%2Fuploads%2F2020%2F02%2FGREDO.jpg&fb_obo=1&utld=qaranjobs.com&ccb=13-1&stp=dst-emg0_q75&ur=50234c&_nc_sid=64c8fc&oh=06_AX1U5AxtqsBypo7GZ6x50xsw_4-LRkPPSw-VH0pMAjqp7Q&oe=660F2988,"Dispenser/Storekeeper (4 Positions) – Bardhere , Luuq – Somalia\nhttps://qaranjobs.com/job/dispenser-storekeeper-4-positions/",,3.0,[],0,[],0,,
5072,1.905016e+30,2022-10-30 03:47:15+00:00,https://www.facebook.com/190501611569959/posts/pfbid05wnuURqWRw1gXWjk6hvgzhM53WmvzUvqDNAxst82Uqv5awtPnuBZhsGbvoVgFLTZl/,https://external.fmgq1-2.fna.fbcdn.net/emg1/v/t13/1766670028170992400?url=https%3A%2F%2Fqaranjobs.com%2Fwp-content%2Fuploads%2F2020%2F02%2FGREDO.jpg&fb_obo=1&utld=qaranjobs.com&ccb=13-1&stp=dst-emg0_q75&ur=50234c&_nc_sid=64c8fc&oh=06_AX1U5AxtqsBypo7GZ6x50xsw_4-LRkPPSw-VH0pMAjqp7Q&oe=660F2988,"Qualified Midwife (6 positions) – Bardhere and Luuq, Somalia\nhttps://qaranjobs.com/job/qualified-midwife-6-positions/",,3.0,[],0,[],0,,
5073,1.905016e+30,2022-10-30 03:46:28+00:00,https://www.facebook.com/190501611569959/posts/pfbid0d7z18mLubvEiyscJEXuzRUqX464AhyUA3ndPgheaovtSiGYFPEP3Bk71M1pSsKYLl/,https://external.fmgq1-2.fna.fbcdn.net/emg1/v/t13/1766670028170992400?url=https%3A%2F%2Fqaranjobs.com%2Fwp-content%2Fuploads%2F2020%2F02%2FGREDO.jpg&fb_obo=1&utld=qaranjobs.com&ccb=13-1&stp=dst-emg0_q75&ur=50234c&_nc_sid=64c8fc&oh=06_AX1U5AxtqsBypo7GZ6x50xsw_4-LRkPPSw-VH0pMAjqp7Q&oe=660F2988,"Community Mobilizer (4 Positions) – Bardhere and Luuq, Somalia\nhttps://qaranjobs.com/job/community-mobilizer-4-positions/",,4.0,[],2,[],0,,
5074,1.905016e+30,2022-10-30 03:45:28+00:00,https://www.facebook.com/190501611569959/posts/pfbid027sqhKuf1fRRtBdaKqVc91xYGNE53dPPkB9GTMnQwcLLe9UHsnQFALwhBv3YM4TU3l/,https://external.fmgq1-2.fna.fbcdn.net/emg1/v/t13/1766670028170992400?url=https%3A%2F%2Fqaranjobs.com%2Fwp-content%2Fuploads%2F2020%2F02%2FGREDO.jpg&fb_obo=1&utld=qaranjobs.com&ccb=13-1&stp=dst-emg0_q75&ur=50234c&_nc_sid=64c8fc&oh=06_AX1U5AxtqsBypo7GZ6x50xsw_4-LRkPPSw-VH0pMAjqp7Q&oe=660F2988,"Nutrition Supervisor (2 Position) – Bardhere and Luuq, Somalia\nhttps://qaranjobs.com/job/nutrition-supervisor-2-position/",,3.0,[],0,[],0,,


In [33]:
df_posts.head()

Unnamed: 0,id,created_time,permalink_url,full_picture,message,attachments,shares.count,reactions.data,reactions.summary.total_count,comments.data,comments.summary.total_count,shares,attachments.data
0,1.905016e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434080331_726961826305571_8310852036318558766_n.jpg?_nc_cat=110&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1tl241klzLYAX__666E&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfCM8tnnOdS6Jlbu3vOx0orTG08J84UzL6bK1qoU747-YA&oe=66133975,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs",,3.0,[],30,"[{'id': '726961929638894_324291780293386', 'created_time': '2024-03-31T11:45:28+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Edinka waxa aad soo dhegtan meel laka codsado mawalahan Ee maxay tahay', 'comment_count': 0}, {'id': '726961929638894_385598347646729', 'created_time': '2024-03-31T11:56:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Se wax yeshen hr', 'comment_count': 0}, {'id': '726961929638894_434427659047806', 'created_time': '2024-03-31T11:56:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hee', 'comment_count': 0}, {'id': '726961929638894_1129552994749831', 'created_time': '2024-03-31T12:03:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mel laga applied malahan mexethy', 'comment_count': 0}, {'id': '726961929638894_616721103985469', 'created_time': '2024-03-31T12:26:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Email ka mesha ku qoran ku gudbi codsigaaga hadii aa goobta laka shaqa galaayo u dhowtahay benalayaal waaye wax ay online ku qaadayana malahaa', 'comment_count': 0}]",5,,
1,1.905016e+29,2024-03-31 09:02:41+00:00,https://www.facebook.com/100069752805511/posts/pfbid0RmfXb5oTPfA8LatCYi3PPKHvEHZzxKpoE86VpaHpdQsLRkjgfnQzzSdGjGmhaaMUl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433886106_726893962979024_3122369860527213223_n.jpg?_nc_cat=104&ccb=1-7&_nc_sid=5f2048&_nc_ohc=REyoC3f_PH4AX-p6gR_&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB376Cvxz4GDOFB467eBKcSbvUpmAVWBoGeEgoiEkHRXA&oe=661316D5,"𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka Soomaaliya Waxaa Ka Banaan 5 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\n🔗𝑳𝒊𝒏𝒌: https://qaranjobs.com/job/somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024/\nF.G: Fursadahan Shaqo Maanta Ayaa U Danbeysa Oo Ay Xirmayaan (31 Mar 2024). \n #qaranjobs #Somalia",,,[],28,"[{'id': '726894056312348_478021441447570', 'created_time': '2024-03-31T09:34:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'waad soo dahiseyn sxbyaal', 'comment_count': 0}]",1,,
2,1.905016e+29,2024-03-31 08:15:21+00:00,https://www.facebook.com/100069752805511/posts/pfbid02VFNiABu2fAYjDFsyQk9pg627tyx6iC3Sns1A9gBSM5mzkueUB8iakZVahBURCqUNl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433933765_726874939647593_6225378752850722358_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1qHLwpQYnC8AX8lhnvy&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB-tEBwnl0v0bi20Ph5Xf0SDqfiUBLjqjwWNVj8yIARBA&oe=661323A6,"Fursad Shaqo:\nDhamaan Dadwaynaha Ku Nool Deegaanada Koonfur Galbeed Soomaaliya Gaar Ahaan Magaalada Baydhabo Waxaa La' Ogeysiineynaa in ay Fursad Shaqo Ka Banantahay Mashruuca Somalia Urban Resilience Project (SURP II), oo ay fulinayso Dowlaha Hoose ee Baydhabo. Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/financial-management-specialist-baidoa-somalia/ \n #qaranjobs #Somalijobs #somalijobs",,3.0,[],22,"[{'id': '726875066314247_433778902458181', 'created_time': '2024-03-31T08:30:50+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ali Hassan Salat', 'comment_count': 1, 'comments': {'data': [{'created_time': '2024-04-02T04:49:01+0000', 'from': {'name': 'Ali Hassan Salat', 'id': '100039194043550'}, 'message': 'Mohamed Ahmed Hassan thanks wllka', 'id': '726875066314247_714105697589582'}]}}, {'id': '726875066314247_1877682789335640', 'created_time': '2024-03-31T10:34:23+0000', 'reactions': {'data': [{'id': '100017032722294', 'name': 'Abdirahman Sheikh Abdisalan', 'type': 'LIKE'}], 'summary': {'total_count': 1}}, 'message': 'Abdirahman Sheikh Abdisalan', 'comment_count': 0}, {'id': '726875066314247_953345846519658', 'created_time': '2024-04-01T15:07:18+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Abdi Mohamed Abdalle adigy kuu baahnyihiin dadkaan', 'comment_count': 0}]",3,,
3,1.905016e+29,2024-03-31 06:36:19+00:00,https://www.facebook.com/100069752805511/posts/pfbid0basJmPUp46cRs1Cd3urQxKit1RXQM7kpPbxLHVUUUmmjCemtKK4yycWW9ozJvX2Gl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434431744_726836226318131_8935310608416442472_n.jpg?_nc_cat=106&ccb=1-7&_nc_sid=5f2048&_nc_ohc=dXrs6KPFjX8AX_OuZ2o&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfDdRAD-udC7QITleGB7j3uCnHh6aUEmiJwzi2y2utuD6Q&oe=66132471,"4 Fursado Shaqo Ah:\nSalaam Somali Bank Waxaa Ka Banaan 4 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/fursado-shaqo-oo-ka-banaan-salaam-somali-bank/ \nDeadline: 31 March 2024.\n #qaranjobs",,1.0,[],17,[],0,,
4,1.905016e+29,2024-03-30 13:08:50+00:00,https://www.facebook.com/100069752805511/posts/pfbid0d6fzYLHcTyBAZKrJLT5skjWv7yPeGi2XYNeDqQogesHDWqeih91MVE6HUaQyXEiel,https://scontent.fmgq1-2.fna.fbcdn.net/v/t45.1600-4/433679821_120208841026560759_4401921263084898502_n.jpg?_nc_cat=103&ccb=1-7&_nc_ohc=65sYSxm1OKwAX8v6zgK&_nc_ht=scontent.fmgq1-2.fna&stp=dst-emg0_q75&ur=5f2048&_nc_sid=64c8fc&oh=00_AfBh_wpI1lyC7fTeaI8OU6-zfc_0btJi1JcwLp5Hi0prbw&oe=66131A63,"6 Fursado Shaqo Ah:\nHay'adda Save the Children Waxaa Ka Banaan 6 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/somali-jobs-at-save-the-children-6-positions/ \n #qaranjobs",,9.0,[],161,"[{'id': '726381423030278_1141881690573753', 'created_time': '2024-03-30T15:12:19+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Siciid Ck Allaale', 'comment_count': 0}, {'id': '726381423030278_381699048107054', 'created_time': '2024-03-30T15:30:23+0000', 'reactions': {'data': [{'id': '100013337285994', 'name': ""Mu'ad Xaange"", 'type': 'LOVE'}], 'summary': {'total_count': 1}}, 'message': 'Mu'ad Xaange', 'comment_count': 0}, {'id': '726381423030278_302977372812875', 'created_time': '2024-03-30T15:46:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Issack Ali hassan', 'comment_count': 0}, {'id': '726381423030278_2761204940694365', 'created_time': '2024-03-30T15:50:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'bahja apd naasir muuse', 'comment_count': 0}, {'id': '726381423030278_1194552491991562', 'created_time': '2024-03-30T19:21:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibarahim Aljacfari', 'comment_count': 0}, {'id': '726381423030278_726186839691412', 'created_time': '2024-03-30T20:39:22+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Saluugla Hassan Biixi apply dhhe', 'comment_count': 0}, {'id': '726381423030278_444388684615445', 'created_time': '2024-03-30T20:49:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Wr somalia wxaan ahayn masoo gudbisan', 'comment_count': 0}, {'id': '726381423030278_1153199375693759', 'created_time': '2024-03-30T21:42:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Said Ciyoon boos adiga ah baa ku jira taliye', 'comment_count': 0}, {'id': '726381423030278_1356799048517717', 'created_time': '2024-03-30T21:46:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mohamud Ciiro', 'comment_count': 0}, {'id': '726381423030278_1075330583542952', 'created_time': '2024-03-31T00:25:53+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Puntlnd maxaa ka hapary meshan😩', 'comment_count': 0}, {'id': '726381423030278_809485873882686', 'created_time': '2024-03-31T04:39:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Bile Shicinbir waxan waa shaqadadii', 'comment_count': 0}, {'id': '726381423030278_2163154464027200', 'created_time': '2024-03-31T06:53:58+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Beenta iska dhaafa waxaad si sharci dara ah u dhimeysaan shaqaalihii in mudo ah idiin shaqeynaayey', 'comment_count': 0}, {'id': '726381423030278_1458597454757353', 'created_time': '2024-03-31T11:02:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Dahir Shariif', 'comment_count': 0}, {'id': '726381423030278_1381398322539378', 'created_time': '2024-04-02T11:23:44+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibraahim Haajir muqdisho aya ku jirto', 'comment_count': 0}, {'id': '726381423030278_933785488238914', 'created_time': '2024-04-03T10:44:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hamda A. Adam', 'comment_count': 0}, {'id': '726381423030278_1434383210499507', 'created_time': '2024-04-03T10:45:10+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Sa'da Abdullah', 'comment_count': 0}]",16,,


In [34]:
df_comments.head()

Unnamed: 0,id,created_time,message,comment_count,reactions.data,reactions.summary.total_count,comments.data,post_id,post_created_time,post_permalink_url,post_shares.count,post_message
0,726961929638894_324291780293386,2024-03-31T11:45:28+0000,Edinka waxa aad soo dhegtan meel laka codsado mawalahan Ee maxay tahay,0,[],0,,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs"
1,726961929638894_385598347646729,2024-03-31T11:56:24+0000,Se wax yeshen hr,0,[],0,,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs"
2,726961929638894_434427659047806,2024-03-31T11:56:27+0000,Hee,0,[],0,,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs"
3,726961929638894_1129552994749831,2024-03-31T12:03:27+0000,Mel laga applied malahan mexethy,0,[],0,,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs"
4,726961929638894_616721103985469,2024-03-31T12:26:15+0000,Email ka mesha ku qoran ku gudbi codsigaaga hadii aa goobta laka shaqa galaayo u dhowtahay benalayaal waaye wax ay online ku qaadayana malahaa,0,[],0,,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs"


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5075 entries, 0 to 5074
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   id             5075 non-null   float64            
 1   created_time   5075 non-null   datetime64[ns, UTC]
 2   permalink_url  5075 non-null   object             
 3   full_picture   5074 non-null   object             
 4   shares         2987 non-null   object             
 5   reactions      5075 non-null   object             
 6   message        5075 non-null   object             
 7   comments       5075 non-null   object             
 8   attachments    1 non-null      object             
dtypes: datetime64[ns, UTC](1), float64(1), object(7)
memory usage: 357.0+ KB


In [35]:
df_reactions.head()

Unnamed: 0,id,name,type,comment_id,comment_comments.data.id.created_time.message
0,100017032722294,Abdirahman Sheikh Abdisalan,LIKE,1.905016115699597e+29,
1,100013337285994,Mu'ad Xaange,LOVE,1.905016115699597e+29,
2,100031854677787,Salfudin Ina Abdikarim,LIKE,1.905016115699597e+29,
3,100023282056335,Fowzia Ali Qoryarey,LIKE,1.905016115699597e+29,
4,100007317193398,Mohamed Badri,LIKE,1.905016115699597e+29,


In [36]:
df_reactions.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
814    False
815    False
816    False
817    False
818    False
Length: 819, dtype: bool

In [37]:
duplicate_rows = df_reactions[df_reactions.duplicated(['name'])]
duplicate_rows


Unnamed: 0,id,name,type,comment_id,comment_comments.data.id.created_time.message
40,100010911224200,Mawa Moalin Ali,LIKE,190501611569959707823801630720.0,
52,100012919353250,Dalmar Abdulahi Shurie,LOVE,190501611569959672639429541888.0,
58,100012919353250,Dalmar Abdulahi Shurie,LOVE,190501611569959672639429541888.0,
59,100012919353250,Dalmar Abdulahi Shurie,LOVE,190501611569959672639429541888.0,
85,100009508839913,Sharif Abdikarim Hassan,LIKE,190501611569959672639429541888.0,
...,...,...,...,...,...
773,100013705968495,Cabdi Xamiid Macalin Yuusuf,LIKE,1905016115699591096894761205760.0,
775,100003696061160,Eng-Mohammed Hussain Hassan,LOVE,1905016115699591096894761205760.0,
780,100003892754242,Samir Saed,LIKE,1905016115699591096894761205760.0,
786,100015199756361,Sacdiya Jamac,LIKE,1905016115699591096894761205760.0,


1. Checking for Duplicates
You should check for and remove any duplicate records in each dataframe. Duplicates can skew your analysis by giving undue weight to repeated entries.

In [38]:
# Example: Converting list columns to strings before removing duplicates
df_posts = df_posts.applymap(lambda x: str(x) if isinstance(x, list) else x)
df_posts = df_posts.drop_duplicates()

df_comments = df_comments.applymap(lambda x: str(x) if isinstance(x, list) else x)
df_comments = df_comments.drop_duplicates()

df_reactions = df_reactions.applymap(lambda x: str(x) if isinstance(x, list) else x)
df_reactions = df_reactions.drop_duplicates()


2. Handling Missing Values
Determine how to handle missing values based on the context of your data. For example, if a column like permalink_url in df_posts has missing values, it might be critical to address this since the URL is a key piece of data.

3. Ensuring Consistent Formatting
Make sure that data types are consistent and appropriate for each column, particularly for dates, identifiers, and numerical fields.

In [39]:
# Convert datetime columns to appropriate format if not already
df_posts['created_time'] = pd.to_datetime(df_posts['created_time'])
df_comments['created_time'] = pd.to_datetime(df_comments['created_time'])
#df_reactions['comment_created_time'] = pd.to_datetime(df_reactions['comment_created_time'])  # Assuming this is the correct column name

# Ensure identifiers like post IDs are strings to prevent numeric precision issues
df_posts['id'] = df_posts['id'].astype(str)
df_comments['post_id'] = df_comments['post_id'].astype(str)
df_reactions['comment_id'] = df_reactions['comment_id'].astype(str)


4. Verifying the Structure and Content
After cleaning, it's a good practice to check the structure and content of each dataframe to ensure they are logically consistent and aligned with your analytical goals.

In [40]:
# Display the info and first few rows to verify the structure and cleanliness
df_posts.head()


Unnamed: 0,id,created_time,permalink_url,full_picture,message,attachments,shares.count,reactions.data,reactions.summary.total_count,comments.data,comments.summary.total_count,shares,attachments.data
0,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434080331_726961826305571_8310852036318558766_n.jpg?_nc_cat=110&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1tl241klzLYAX__666E&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfCM8tnnOdS6Jlbu3vOx0orTG08J84UzL6bK1qoU747-YA&oe=66133975,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs",,3.0,[],30,"[{'id': '726961929638894_324291780293386', 'created_time': '2024-03-31T11:45:28+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Edinka waxa aad soo dhegtan meel laka codsado mawalahan Ee maxay tahay', 'comment_count': 0}, {'id': '726961929638894_385598347646729', 'created_time': '2024-03-31T11:56:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Se wax yeshen hr', 'comment_count': 0}, {'id': '726961929638894_434427659047806', 'created_time': '2024-03-31T11:56:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hee', 'comment_count': 0}, {'id': '726961929638894_1129552994749831', 'created_time': '2024-03-31T12:03:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mel laga applied malahan mexethy', 'comment_count': 0}, {'id': '726961929638894_616721103985469', 'created_time': '2024-03-31T12:26:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Email ka mesha ku qoran ku gudbi codsigaaga hadii aa goobta laka shaqa galaayo u dhowtahay benalayaal waaye wax ay online ku qaadayana malahaa', 'comment_count': 0}]",5,,
1,1.905016115699597e+29,2024-03-31 09:02:41+00:00,https://www.facebook.com/100069752805511/posts/pfbid0RmfXb5oTPfA8LatCYi3PPKHvEHZzxKpoE86VpaHpdQsLRkjgfnQzzSdGjGmhaaMUl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433886106_726893962979024_3122369860527213223_n.jpg?_nc_cat=104&ccb=1-7&_nc_sid=5f2048&_nc_ohc=REyoC3f_PH4AX-p6gR_&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB376Cvxz4GDOFB467eBKcSbvUpmAVWBoGeEgoiEkHRXA&oe=661316D5,"𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka Soomaaliya Waxaa Ka Banaan 5 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\n🔗𝑳𝒊𝒏𝒌: https://qaranjobs.com/job/somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024/\nF.G: Fursadahan Shaqo Maanta Ayaa U Danbeysa Oo Ay Xirmayaan (31 Mar 2024). \n #qaranjobs #Somalia",,,[],28,"[{'id': '726894056312348_478021441447570', 'created_time': '2024-03-31T09:34:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'waad soo dahiseyn sxbyaal', 'comment_count': 0}]",1,,
2,1.905016115699597e+29,2024-03-31 08:15:21+00:00,https://www.facebook.com/100069752805511/posts/pfbid02VFNiABu2fAYjDFsyQk9pg627tyx6iC3Sns1A9gBSM5mzkueUB8iakZVahBURCqUNl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433933765_726874939647593_6225378752850722358_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1qHLwpQYnC8AX8lhnvy&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB-tEBwnl0v0bi20Ph5Xf0SDqfiUBLjqjwWNVj8yIARBA&oe=661323A6,"Fursad Shaqo:\nDhamaan Dadwaynaha Ku Nool Deegaanada Koonfur Galbeed Soomaaliya Gaar Ahaan Magaalada Baydhabo Waxaa La' Ogeysiineynaa in ay Fursad Shaqo Ka Banantahay Mashruuca Somalia Urban Resilience Project (SURP II), oo ay fulinayso Dowlaha Hoose ee Baydhabo. Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/financial-management-specialist-baidoa-somalia/ \n #qaranjobs #Somalijobs #somalijobs",,3.0,[],22,"[{'id': '726875066314247_433778902458181', 'created_time': '2024-03-31T08:30:50+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ali Hassan Salat', 'comment_count': 1, 'comments': {'data': [{'created_time': '2024-04-02T04:49:01+0000', 'from': {'name': 'Ali Hassan Salat', 'id': '100039194043550'}, 'message': 'Mohamed Ahmed Hassan thanks wllka', 'id': '726875066314247_714105697589582'}]}}, {'id': '726875066314247_1877682789335640', 'created_time': '2024-03-31T10:34:23+0000', 'reactions': {'data': [{'id': '100017032722294', 'name': 'Abdirahman Sheikh Abdisalan', 'type': 'LIKE'}], 'summary': {'total_count': 1}}, 'message': 'Abdirahman Sheikh Abdisalan', 'comment_count': 0}, {'id': '726875066314247_953345846519658', 'created_time': '2024-04-01T15:07:18+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Abdi Mohamed Abdalle adigy kuu baahnyihiin dadkaan', 'comment_count': 0}]",3,,
3,1.905016115699597e+29,2024-03-31 06:36:19+00:00,https://www.facebook.com/100069752805511/posts/pfbid0basJmPUp46cRs1Cd3urQxKit1RXQM7kpPbxLHVUUUmmjCemtKK4yycWW9ozJvX2Gl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434431744_726836226318131_8935310608416442472_n.jpg?_nc_cat=106&ccb=1-7&_nc_sid=5f2048&_nc_ohc=dXrs6KPFjX8AX_OuZ2o&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfDdRAD-udC7QITleGB7j3uCnHh6aUEmiJwzi2y2utuD6Q&oe=66132471,"4 Fursado Shaqo Ah:\nSalaam Somali Bank Waxaa Ka Banaan 4 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/fursado-shaqo-oo-ka-banaan-salaam-somali-bank/ \nDeadline: 31 March 2024.\n #qaranjobs",,1.0,[],17,[],0,,
4,1.905016115699597e+29,2024-03-30 13:08:50+00:00,https://www.facebook.com/100069752805511/posts/pfbid0d6fzYLHcTyBAZKrJLT5skjWv7yPeGi2XYNeDqQogesHDWqeih91MVE6HUaQyXEiel,https://scontent.fmgq1-2.fna.fbcdn.net/v/t45.1600-4/433679821_120208841026560759_4401921263084898502_n.jpg?_nc_cat=103&ccb=1-7&_nc_ohc=65sYSxm1OKwAX8v6zgK&_nc_ht=scontent.fmgq1-2.fna&stp=dst-emg0_q75&ur=5f2048&_nc_sid=64c8fc&oh=00_AfBh_wpI1lyC7fTeaI8OU6-zfc_0btJi1JcwLp5Hi0prbw&oe=66131A63,"6 Fursado Shaqo Ah:\nHay'adda Save the Children Waxaa Ka Banaan 6 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/somali-jobs-at-save-the-children-6-positions/ \n #qaranjobs",,9.0,[],161,"[{'id': '726381423030278_1141881690573753', 'created_time': '2024-03-30T15:12:19+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Siciid Ck Allaale', 'comment_count': 0}, {'id': '726381423030278_381699048107054', 'created_time': '2024-03-30T15:30:23+0000', 'reactions': {'data': [{'id': '100013337285994', 'name': ""Mu'ad Xaange"", 'type': 'LOVE'}], 'summary': {'total_count': 1}}, 'message': ""Mu'ad Xaange"", 'comment_count': 0}, {'id': '726381423030278_302977372812875', 'created_time': '2024-03-30T15:46:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Issack Ali hassan', 'comment_count': 0}, {'id': '726381423030278_2761204940694365', 'created_time': '2024-03-30T15:50:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'bahja apd naasir muuse', 'comment_count': 0}, {'id': '726381423030278_1194552491991562', 'created_time': '2024-03-30T19:21:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibarahim Aljacfari', 'comment_count': 0}, {'id': '726381423030278_726186839691412', 'created_time': '2024-03-30T20:39:22+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Saluugla Hassan Biixi apply dhhe', 'comment_count': 0}, {'id': '726381423030278_444388684615445', 'created_time': '2024-03-30T20:49:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Wr somalia wxaan ahayn masoo gudbisan', 'comment_count': 0}, {'id': '726381423030278_1153199375693759', 'created_time': '2024-03-30T21:42:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Said Ciyoon boos adiga ah baa ku jira taliye', 'comment_count': 0}, {'id': '726381423030278_1356799048517717', 'created_time': '2024-03-30T21:46:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mohamud Ciiro', 'comment_count': 0}, {'id': '726381423030278_1075330583542952', 'created_time': '2024-03-31T00:25:53+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Puntlnd maxaa ka hapary meshan😩', 'comment_count': 0}, {'id': '726381423030278_809485873882686', 'created_time': '2024-03-31T04:39:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Bile Shicinbir waxan waa shaqadadii', 'comment_count': 0}, {'id': '726381423030278_2163154464027200', 'created_time': '2024-03-31T06:53:58+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Beenta iska dhaafa waxaad si sharci dara ah u dhimeysaan shaqaalihii in mudo ah idiin shaqeynaayey', 'comment_count': 0}, {'id': '726381423030278_1458597454757353', 'created_time': '2024-03-31T11:02:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Dahir Shariif', 'comment_count': 0}, {'id': '726381423030278_1381398322539378', 'created_time': '2024-04-02T11:23:44+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibraahim Haajir muqdisho aya ku jirto', 'comment_count': 0}, {'id': '726381423030278_933785488238914', 'created_time': '2024-04-03T10:44:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hamda A. Adam', 'comment_count': 0}, {'id': '726381423030278_1434383210499507', 'created_time': '2024-04-03T10:45:10+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': ""Sa'da Abdullah"", 'comment_count': 0}]",16,,


In [41]:
print(df_posts.head())

print(df_comments.info())
print(df_comments.head())

print(df_reactions.info())
print(df_reactions.head())


                       id              created_time  \
0  1.9050161156995974e+29 2024-03-31 11:40:46+00:00   
1  1.9050161156995974e+29 2024-03-31 09:02:41+00:00   
2  1.9050161156995974e+29 2024-03-31 08:15:21+00:00   
3  1.9050161156995974e+29 2024-03-31 06:36:19+00:00   
4  1.9050161156995974e+29 2024-03-30 13:08:50+00:00   

                                                                                                             permalink_url  \
0  https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl   
1   https://www.facebook.com/100069752805511/posts/pfbid0RmfXb5oTPfA8LatCYi3PPKHvEHZzxKpoE86VpaHpdQsLRkjgfnQzzSdGjGmhaaMUl   
2  https://www.facebook.com/100069752805511/posts/pfbid02VFNiABu2fAYjDFsyQk9pg627tyx6iC3Sns1A9gBSM5mzkueUB8iakZVahBURCqUNl   
3   https://www.facebook.com/100069752805511/posts/pfbid0basJmPUp46cRs1Cd3urQxKit1RXQM7kpPbxLHVUUUmmjCemtKK4yycWW9ozJvX2Gl   
4   https://www.facebook.com/1000697528

In [42]:
# Assuming your DataFrame is named 'df' and the column of interest is 'text'
pd.set_option('display.max_colwidth', None)

# Display the 'text' column without truncation
print(df['message'])


0                                                                                                                                                                  Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs
1                                                                             𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka Soomaaliya Waxaa Ka Banaan 5 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\n🔗𝑳𝒊𝒏𝒌: https://qaranjobs.com/job/somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024/\nF.G: Fursadahan Shaqo Maanta Ayaa U Danbeysa Oo Ay Xirmayaan (31 Mar 2024). \n #qaranjobs #Somalia
2       Fursad Shaqo:\nDhamaan Dadwaynaha Ku Nool Deegaanada Koonfur Galbeed Soomaaliya Gaar Ahaan Magaalada Baydhabo Waxaa La' Ogeysiineynaa in ay Fursad Shaqo K

In [43]:
# Extract job titles from URLs
df_posts['job_title'] = df_posts['message'].str.extract(r'https://qaranjobs.com/job/(.*?)/')

# Display the resulting DataFrame
print(df_posts[['message', 'job_title']])

                                                                                                                                                                                                                                                                                                                                                                                                                         message  \
0                                                                                                                                                                Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs   
1                                                                           𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka Soomaaliya Waxaa Ka Banaan 5 Shaqo, Fa

In [44]:
df_posts.head()

Unnamed: 0,id,created_time,permalink_url,full_picture,message,attachments,shares.count,reactions.data,reactions.summary.total_count,comments.data,comments.summary.total_count,shares,attachments.data,job_title
0,1.905016115699597e+29,2024-03-31 11:40:46+00:00,https://www.facebook.com/100069752805511/posts/pfbid029tNGn9xYEdpBps45PhzuPzSmecGLoRd9yAPejysbVoDmRR7F8f2Gfxf38VU8DnUrl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434080331_726961826305571_8310852036318558766_n.jpg?_nc_cat=110&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1tl241klzLYAX__666E&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfCM8tnnOdS6Jlbu3vOx0orTG08J84UzL6bK1qoU747-YA&oe=66133975,"Fursado Shaqo:\nTSFP/MCHN/BSFP Registration Clerk (26), Baidoa, Burhakaba, Berdale & Qansax Dheere, Somalia\nhttps://qaranjobs.com/job/tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere/ \n #qaranjobs #sos #FursadShaqo #somalijobs",,3.0,[],30,"[{'id': '726961929638894_324291780293386', 'created_time': '2024-03-31T11:45:28+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Edinka waxa aad soo dhegtan meel laka codsado mawalahan Ee maxay tahay', 'comment_count': 0}, {'id': '726961929638894_385598347646729', 'created_time': '2024-03-31T11:56:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Se wax yeshen hr', 'comment_count': 0}, {'id': '726961929638894_434427659047806', 'created_time': '2024-03-31T11:56:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hee', 'comment_count': 0}, {'id': '726961929638894_1129552994749831', 'created_time': '2024-03-31T12:03:27+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mel laga applied malahan mexethy', 'comment_count': 0}, {'id': '726961929638894_616721103985469', 'created_time': '2024-03-31T12:26:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Email ka mesha ku qoran ku gudbi codsigaaga hadii aa goobta laka shaqa galaayo u dhowtahay benalayaal waaye wax ay online ku qaadayana malahaa', 'comment_count': 0}]",5,,,tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere
1,1.905016115699597e+29,2024-03-31 09:02:41+00:00,https://www.facebook.com/100069752805511/posts/pfbid0RmfXb5oTPfA8LatCYi3PPKHvEHZzxKpoE86VpaHpdQsLRkjgfnQzzSdGjGmhaaMUl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433886106_726893962979024_3122369860527213223_n.jpg?_nc_cat=104&ccb=1-7&_nc_sid=5f2048&_nc_ohc=REyoC3f_PH4AX-p6gR_&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB376Cvxz4GDOFB467eBKcSbvUpmAVWBoGeEgoiEkHRXA&oe=661316D5,"𝟓 𝐅𝐮𝐫𝐬𝐚𝐝𝐨 𝐒𝐡𝐚𝐪𝐨 𝐀𝐡:\nJamhuuriyadda Federaalka Soomaaliya Waxaa Ka Banaan 5 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\n🔗𝑳𝒊𝒏𝒌: https://qaranjobs.com/job/somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024/\nF.G: Fursadahan Shaqo Maanta Ayaa U Danbeysa Oo Ay Xirmayaan (31 Mar 2024). \n #qaranjobs #Somalia",,,[],28,"[{'id': '726894056312348_478021441447570', 'created_time': '2024-03-31T09:34:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'waad soo dahiseyn sxbyaal', 'comment_count': 0}]",1,,,somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024
2,1.905016115699597e+29,2024-03-31 08:15:21+00:00,https://www.facebook.com/100069752805511/posts/pfbid02VFNiABu2fAYjDFsyQk9pg627tyx6iC3Sns1A9gBSM5mzkueUB8iakZVahBURCqUNl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/433933765_726874939647593_6225378752850722358_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=5f2048&_nc_ohc=1qHLwpQYnC8AX8lhnvy&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfB-tEBwnl0v0bi20Ph5Xf0SDqfiUBLjqjwWNVj8yIARBA&oe=661323A6,"Fursad Shaqo:\nDhamaan Dadwaynaha Ku Nool Deegaanada Koonfur Galbeed Soomaaliya Gaar Ahaan Magaalada Baydhabo Waxaa La' Ogeysiineynaa in ay Fursad Shaqo Ka Banantahay Mashruuca Somalia Urban Resilience Project (SURP II), oo ay fulinayso Dowlaha Hoose ee Baydhabo. Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/financial-management-specialist-baidoa-somalia/ \n #qaranjobs #Somalijobs #somalijobs",,3.0,[],22,"[{'id': '726875066314247_433778902458181', 'created_time': '2024-03-31T08:30:50+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ali Hassan Salat', 'comment_count': 1, 'comments': {'data': [{'created_time': '2024-04-02T04:49:01+0000', 'from': {'name': 'Ali Hassan Salat', 'id': '100039194043550'}, 'message': 'Mohamed Ahmed Hassan thanks wllka', 'id': '726875066314247_714105697589582'}]}}, {'id': '726875066314247_1877682789335640', 'created_time': '2024-03-31T10:34:23+0000', 'reactions': {'data': [{'id': '100017032722294', 'name': 'Abdirahman Sheikh Abdisalan', 'type': 'LIKE'}], 'summary': {'total_count': 1}}, 'message': 'Abdirahman Sheikh Abdisalan', 'comment_count': 0}, {'id': '726875066314247_953345846519658', 'created_time': '2024-04-01T15:07:18+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Abdi Mohamed Abdalle adigy kuu baahnyihiin dadkaan', 'comment_count': 0}]",3,,,financial-management-specialist-baidoa-somalia
3,1.905016115699597e+29,2024-03-31 06:36:19+00:00,https://www.facebook.com/100069752805511/posts/pfbid0basJmPUp46cRs1Cd3urQxKit1RXQM7kpPbxLHVUUUmmjCemtKK4yycWW9ozJvX2Gl,https://scontent.fmgq1-2.fna.fbcdn.net/v/t39.30808-6/434431744_726836226318131_8935310608416442472_n.jpg?_nc_cat=106&ccb=1-7&_nc_sid=5f2048&_nc_ohc=dXrs6KPFjX8AX_OuZ2o&_nc_ht=scontent.fmgq1-2.fna&oh=00_AfDdRAD-udC7QITleGB7j3uCnHh6aUEmiJwzi2y2utuD6Q&oe=66132471,"4 Fursado Shaqo Ah:\nSalaam Somali Bank Waxaa Ka Banaan 4 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/fursado-shaqo-oo-ka-banaan-salaam-somali-bank/ \nDeadline: 31 March 2024.\n #qaranjobs",,1.0,[],17,[],0,,,fursado-shaqo-oo-ka-banaan-salaam-somali-bank
4,1.905016115699597e+29,2024-03-30 13:08:50+00:00,https://www.facebook.com/100069752805511/posts/pfbid0d6fzYLHcTyBAZKrJLT5skjWv7yPeGi2XYNeDqQogesHDWqeih91MVE6HUaQyXEiel,https://scontent.fmgq1-2.fna.fbcdn.net/v/t45.1600-4/433679821_120208841026560759_4401921263084898502_n.jpg?_nc_cat=103&ccb=1-7&_nc_ohc=65sYSxm1OKwAX8v6zgK&_nc_ht=scontent.fmgq1-2.fna&stp=dst-emg0_q75&ur=5f2048&_nc_sid=64c8fc&oh=00_AfBh_wpI1lyC7fTeaI8OU6-zfc_0btJi1JcwLp5Hi0prbw&oe=66131A63,"6 Fursado Shaqo Ah:\nHay'adda Save the Children Waxaa Ka Banaan 6 Shaqo, Fadlan Link-ga hoose ka codso: 👇👇\nhttps://qaranjobs.com/job/somali-jobs-at-save-the-children-6-positions/ \n #qaranjobs",,9.0,[],161,"[{'id': '726381423030278_1141881690573753', 'created_time': '2024-03-30T15:12:19+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Siciid Ck Allaale', 'comment_count': 0}, {'id': '726381423030278_381699048107054', 'created_time': '2024-03-30T15:30:23+0000', 'reactions': {'data': [{'id': '100013337285994', 'name': ""Mu'ad Xaange"", 'type': 'LOVE'}], 'summary': {'total_count': 1}}, 'message': ""Mu'ad Xaange"", 'comment_count': 0}, {'id': '726381423030278_302977372812875', 'created_time': '2024-03-30T15:46:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Issack Ali hassan', 'comment_count': 0}, {'id': '726381423030278_2761204940694365', 'created_time': '2024-03-30T15:50:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'bahja apd naasir muuse', 'comment_count': 0}, {'id': '726381423030278_1194552491991562', 'created_time': '2024-03-30T19:21:24+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibarahim Aljacfari', 'comment_count': 0}, {'id': '726381423030278_726186839691412', 'created_time': '2024-03-30T20:39:22+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Saluugla Hassan Biixi apply dhhe', 'comment_count': 0}, {'id': '726381423030278_444388684615445', 'created_time': '2024-03-30T20:49:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Wr somalia wxaan ahayn masoo gudbisan', 'comment_count': 0}, {'id': '726381423030278_1153199375693759', 'created_time': '2024-03-30T21:42:15+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Said Ciyoon boos adiga ah baa ku jira taliye', 'comment_count': 0}, {'id': '726381423030278_1356799048517717', 'created_time': '2024-03-30T21:46:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Mohamud Ciiro', 'comment_count': 0}, {'id': '726381423030278_1075330583542952', 'created_time': '2024-03-31T00:25:53+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Puntlnd maxaa ka hapary meshan😩', 'comment_count': 0}, {'id': '726381423030278_809485873882686', 'created_time': '2024-03-31T04:39:47+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Bile Shicinbir waxan waa shaqadadii', 'comment_count': 0}, {'id': '726381423030278_2163154464027200', 'created_time': '2024-03-31T06:53:58+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Beenta iska dhaafa waxaad si sharci dara ah u dhimeysaan shaqaalihii in mudo ah idiin shaqeynaayey', 'comment_count': 0}, {'id': '726381423030278_1458597454757353', 'created_time': '2024-03-31T11:02:14+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Dahir Shariif', 'comment_count': 0}, {'id': '726381423030278_1381398322539378', 'created_time': '2024-04-02T11:23:44+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Ibraahim Haajir muqdisho aya ku jirto', 'comment_count': 0}, {'id': '726381423030278_933785488238914', 'created_time': '2024-04-03T10:44:54+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': 'Hamda A. Adam', 'comment_count': 0}, {'id': '726381423030278_1434383210499507', 'created_time': '2024-04-03T10:45:10+0000', 'reactions': {'data': [], 'summary': {'total_count': 0}}, 'message': ""Sa'da Abdullah"", 'comment_count': 0}]",16,,,somali-jobs-at-save-the-children-6-positions


In [45]:
# Remove hyphens from job titles
df_posts['cleaned_job_title'] = df_posts['job_title'].str.replace('-', ' ')

# Display the resulting DataFrame
print(df_posts[['job_title', 'cleaned_job_title']])

                                                                                      job_title  \
0                   tsfp-mchn-bsfp-registration-clerk-26-baidoa-burhakaba-berdale-qansax-dheere   
1     somali-jobs-the-federal-republic-of-somalia-5-positions-are-closing-today-march-31st-2024   
2                                                financial-management-specialist-baidoa-somalia   
3                                                 fursado-shaqo-oo-ka-banaan-salaam-somali-bank   
4                                                  somali-jobs-at-save-the-children-6-positions   
...                                                                                         ...   
5070                                                        community-health-worker-4-positions   
5071                                                          dispenser-storekeeper-4-positions   
5072                                                              qualified-midwife-6-positions   
5073      

In [46]:
#df_posts.to_csv('df_posts5075.csv')

In [47]:
#df_comments.to_csv('df_Comments.csv')
#df_reactions.to_csv('df_reactions.csv')

PermissionError: [Errno 13] Permission denied: 'df_Comments.csv'

In [52]:
# Assuming 'from_id' is the column that contains the user IDs
if 'id' in df_comments.columns:
    # Count the number of comments each user has made
    most_commented_user = df_comments['id'].value_counts().idxmax()

    # Get the count of comments for the most commented user
    most_comments_count = df_comments['id'].value_counts().max()

    print("Most commented user ID:", most_commented_user)
    print("Number of comments by the most active user:", most_comments_count)
else:
    print("The column 'from_id' does not exist in df_comments.")


Most commented user ID: 726961929638894_324291780293386
Number of comments by the most active user: 1


In [57]:
# Assuming 'id' is the column that contains the user IDs
if 'id' in df_reactions.columns:
    # Count the number of reactions each user has made
    most_liked_user = df_reactions['id'].value_counts().idxmax()

    # Get the count of reactions for the most liked user
    most_liked_count = df_reactions['id'].value_counts().max()

    print("Most liked user ID:", most_liked_user)
    print("Number of likes by the most active user:", most_liked_count)
else:
    print("The column 'id' does not exist in df_reactions.")


Most liked user ID: 100031854677787
Number of likes by the most active user: 3


In [56]:
df_reactions['id'].value_counts()

100031854677787    3
100006321875269    3
100007234114604    3
100043314547609    2
100003900784025    2
                  ..
100007554103669    1
100002571983051    1
100072801791811    1
100003469537938    1
100002980190175    1
Name: id, Length: 734, dtype: int64