In [1]:
from cloudpathlib import AnyPath

#root_path = AnyPath("/home/john/projects/labcats/new_process_data")
root_path = AnyPath("gs://categorum-test/hyperlocal")

data_root = root_path / "data"

import app.file_utils as fu
from importlib import reload
reload(fu)

fu.maybe_mkdir(data_root)

target_locations = ['Peckham','Gorton and Denton', 'Kensington and Bayswater', 'Bolsover','Makerfield']

In [2]:
import app.process_week as pw
import elasticsearch
import os
import dotenv
import app.posts_and_comments as pacs
import app.file_utils as fu
from cloudpathlib import AnyPath

from datetime import timezone, timedelta

dotenv.load_dotenv()

print(f"Elasticsearch client version: {elasticsearch.__version__}")

es = elasticsearch.Elasticsearch(
    cloud_id=os.getenv("ES_CLOUD_ID"),
    api_key=os.getenv("API_KEY"),
)
print("Created basic ES client")

start_dt, end_dt = pw.get_most_recent_week(tz=timezone.utc)

files_need_downloading = False

for location in target_locations:
    fout = fu.file_name_to_slug(data_root / f"posts_{location}_{start_dt.strftime('%Y-%m-%d')}_to_{end_dt.strftime('%Y-%m-%d')}.feather")
    if not AnyPath(fout).exists():
        files_need_downloading = True
    else:
        print (f"File {fout} already exists")

if files_need_downloading:
    print (start_dt, end_dt)
    posts_df = pw.download_posts_for_period(es, "dalmation-fb-posts", start_dt, end_dt+timedelta(hours=5), target_locations=target_locations)
    comments_df = pw.download_posts_for_period(es, "dalmation-fb-comments", start_dt, end_dt)

    print (f"Downloaded {len(comments_df)} comments and {len(posts_df)} posts")
    df_all_posts_with_top_n_comments = pacs.get_posts_with_top_n_comments(posts_df, comments_df, n_comments=10, min_comments=1)
    

    for location in target_locations:
        df_location = df_all_posts_with_top_n_comments[df_all_posts_with_top_n_comments['tags.location'] == location]
        fout = fu.file_name_to_slug(data_root / f"posts_{location}_{start_dt.strftime('%Y-%m-%d')}_to_{end_dt.strftime('%Y-%m-%d')}.feather")
        fu.write_feather_to_anypath(df_location, fout)


Elasticsearch client version: (9, 3, 0)
Created basic ES client
File gs://categorum-test/hyperlocal/data/posts_peckham_2026-02-04_to_2026-02-11.feather already exists
File gs://categorum-test/hyperlocal/data/posts_gorton-and-denton_2026-02-04_to_2026-02-11.feather already exists
File gs://categorum-test/hyperlocal/data/posts_kensington-and-bayswater_2026-02-04_to_2026-02-11.feather already exists
File gs://categorum-test/hyperlocal/data/posts_bolsover_2026-02-04_to_2026-02-11.feather already exists
File gs://categorum-test/hyperlocal/data/posts_makerfield_2026-02-04_to_2026-02-11.feather already exists


In [3]:
import app.post_processing as pp
import app.file_utils as fu
from importlib import reload
reload(pp)


for location in['Bolsover',]:
    location_slug = fu.file_name_to_slug(location)
    input_path = data_root / f"posts_{location_slug}_{start_dt.strftime('%Y-%m-%d')}_to_{end_dt.strftime('%Y-%m-%d')}.feather"
    pipeline = pp.PostProcessingPipeline(
        categories_path=root_path/"categories_to_study.json", 
        tags_path=data_root/"tags"/location_slug,
        intermediary_path=AnyPath("/home/john/projects/hyperlocal/junk"),
    )
    processed_df = pipeline.process(input_path,save_intermediary_files=True)
    output_filename = f"processed_{location_slug}_{start_dt.strftime('%Y-%m-%d')}_to_{end_dt.strftime('%Y-%m-%d')}.feather"
    fu.write_feather_to_anypath(processed_df, data_root/output_filename)


[2026-02-12T14:59:36] INFO app.post_processing: Stage 1: detected existing output, skipping | extra={'path': '/home/john/projects/hyperlocal/junk/posts_bolsover_2026-02-04_to_2026-02-11-stage1.feather'}
[2026-02-12T14:59:36] INFO app.post_processing: Filtered to local-political rows for stages 2+ | extra={'total': 410, 'political': 101}
[2026-02-12T14:59:36] INFO app.post_processing: Stage 2: detected existing output, skipping | extra={'path': '/home/john/projects/hyperlocal/junk/posts_bolsover_2026-02-04_to_2026-02-11-stage2.feather'}
[2026-02-12T14:59:36] INFO app.post_processing: Stage 3: starting
[2026-02-12T14:59:36] INFO app.post_processing: Stage 3: starting TagManager | extra={'model': 'google/gemini-2.5-flash', 'tags_path': GSPath('gs://categorum-test/hyperlocal/data/tags/bolsover')}
[2026-02-12T14:59:37] INFO app.tag_manager: TagManager initialized | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv'}
[2026-02-12T14:59:37] INFO app.post_processi

  from .autonotebook import tqdm as notebook_tqdm


[2026-02-12T15:00:05] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Public services', 'rows': 51}
[2026-02-12T15:02:06] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Education', 'rows': 3}
[2026-02-12T15:02:08] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Housing', 'rows': 8}
[2026-02-12T15:02:14] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Businesses', 'rows': 2}
[2026-02-12T15:02:37] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Economy', 'rows': 4}
[2026-02-12T15:03:04] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Health', 'rows': 2}
[2026-02-12T15:03:05] INFO app.post_processing: Stage 3: processing main_category subset | extra={'main_category': 'Crime', 'rows': 10}
[2026-02-12T15:03:12] INFO ap

In [4]:
pipeline._df_stage1.columns


Index(['id', 'timestamp', 'url', 'urls', 'domains', 'type', 'body',
       'is_sponsored', 'comments', 'shares', 'likes', 'views', 'hashtags',
       'profile_id', 'delegate_page_id', 'attachments', 'location',
       'scraped_at', 'platform', 'post.id', 'post.url', 'group.id',
       'group.name', 'group.url', 'group.category', 'group.description',
       'group.members', 'group.created_at', 'user.url', 'user.name',
       'user.is_verified', 'tags.region', 'tags.location', 'tags.mp_name',
       'shared_post_id', 'comment_texts', 'is_localpolitical'],
      dtype='str')

In [5]:

pipeline._df_stage2.to_csv("../junk/hmm_bolsover.csv",index=False)


In [None]:
pipeline._df_stage2.categor

category
Public services.Roads & Infrastructure           24
Public services.Waste & Sanitation                8
Environmental.Biodiversity & Animal Welfare       7
Public services.Transport & Transit               6
Public services.Social Care                       5
Crime.Road & Traffic Offenses                     4
Environmental.Waste Management & Recycling        3
Public services.Council Governance                3
Housing.Tenancy & Landlord Relations              3
Crime.Property Offenses                           3
Education.Governance & Policy                     2
Businesses.Employment & Promotion                 2
Environmental.Nuisance & Environmental Health     2
Economy.Retail & High Street                      2
Housing.Homelessness & Welfare                    2
Economy.Employment & Labor                        2
Environmental.Water Management & Pollution        2
Public services.Postal & Delivery                 2
Housing.Construction Standards                    1
Env

In [10]:
import app.tag_manager as tm
from importlib import reload
reload(tm)
df_merged = tm.merge_tags(pipeline._df_stage3,data_root / "tags" / location_slug)
df_merged.tag.value_counts()[:20]


[2026-02-12T15:07:28] INFO app.tag_manager: TagManager initialized | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv'}
[2026-02-12T15:07:28] DEBUG app.tag_manager: Saving tag record | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv', 'rows': 114}


tag
simultaneous roadworks                  6
Wandering horses                        4
food waste collection scheme            4
Dog attacks on livestock                3
illegal parking enforcement             2
Housing exchange difficulties           2
Suspicious door-to-door activity        2
temporary traffic light malfunction     2
Flooding and new housing development    2
HGV diversion enforcement               2
bus route disruption                    2
pothole repair inadequacy               1
pothole prevalence                      1
pothole repair delays                   1
internet safety education               1
care home closures                      1
Housing development snagging            1
Facebook group advertising rules        1
Noise pollution from gates and bins     1
Solar farm development impact           1
Name: count, dtype: int64

In [8]:
import pandas as pd
df_final = pd.read_feather('/home/john/projects/hyperlocal/junk/processed_bolsover_2026-02-04_to_2026-02-11.feather')

FileNotFoundError: [Errno 2] No such file or directory: '/home/john/projects/hyperlocal/junk/processed_bolsover_2026-02-04_to_2026-02-11.feather'

In [None]:
import app.tag_manager as tm
from importlib import reload
reload(tm)
df_merged = tm.merge_tags(df_final,data_root / "tags" / location_slug)

[2026-02-12T12:35:30] INFO app.tag_manager: TagManager initialized | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv'}
[2026-02-12T12:35:30] DEBUG app.tag_manager: Saving tag record | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv', 'rows': 47}


In [None]:
df_merged[df_merged.tag=='Roadworks and traffic disruption']

Unnamed: 0,id,timestamp,url,urls,domains,type,body,is_sponsored,comments,shares,...,tags.mp_name,shared_post_id,comment_texts,is_localpolitical,category,main_category,sub_category,tag_id,tag,tag_description
16,1555718512212499,2026-02-09T18:25:17+00:00,https://www.facebook.com/groups/65394538572315...,[],,Post,Toby Perkins MP Louise Sandher-Jones MP Rache...,False,23,6,...,,,[I think you might need to tag the Reform led ...,True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
51,10162848402452807,2026-02-09T18:38:54+00:00,https://www.facebook.com/groups/33274397806/po...,[],,Post,*** FOR ANYONE USING j29a M1 junction *** Plea...,False,4,0,...,,4493025831022982.0,[Contact your local councillor dawn abbot on F...,True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
55,10162840293832807,2026-02-07T21:09:38+00:00,https://www.facebook.com/groups/33274397806/po...,[],,Post,Report gone into National Grid for contractors...,False,3,0,...,,,"[Flash your lights n they change!, Are they bo...",True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
68,3752376801571073,2026-02-05T09:58:46+00:00,https://www.facebook.com/groups/19622230672531...,[],,Post,You can‚Äôt park that on the trail üòÉ\nYou can‚Äôt ...,False,8,0,...,,,"[looks like he having a..... ..., It‚Äôs not con...",True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
71,4497654170560148,2026-02-09T18:28:50+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Total bedlam! This is Duckmanton at 16:50\nTot...,False,11,12,...,,,"[It was the same this morning. Madness., Just ...",True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
72,4497655440560021,2026-02-09T18:28:44+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Duckmanton gridlock\nDuckmanton gridlock,False,29,24,...,,,[Contact your local councillor \nDAWN ABBOT RE...,True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
73,4497678670557698,2026-02-09T18:28:13+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,"Absolutely mental at markam vale roundabout, a...",False,97,35,...,,,[The ambulance isn‚Äôt stuck because of the road...,True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
75,4497689240556641,2026-02-09T18:28:05+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Has there been an accident in the village? Bus...,False,15,0,...,,,"[No. Just moronic road repair planning, Appall...",True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
76,4497709007221331,2026-02-09T18:27:58+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Who every has agreed to road works on Erin Roa...,False,15,0,...,,,[And inkersall road closed to the council real...,True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...
77,4497263243932574,2026-02-09T09:09:35+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Just a warning the 3 way lights on Erin road /...,False,17,5,...,,,"[Must be sorted now, came through about half a...",True,Non-specific.Non-specific,Non-specific,Non-specific,8.0,Roadworks and traffic disruption,The community is experiencing severe traffic c...


In [None]:
df_merged.category.value_counts()

category
Non-specific.Non-specific                        88
Environmental.Nuisance & Environmental Health     2
Public services.Waste & Sanitation                2
Public services.Roads & Infrastructure            2
Public services.Social Care                       1
Environmental.Biodiversity & Animal Welfare       1
Health.Systemic Policy                            1
Public services.Other Public Services             1
Democracy.Local Representation                    1
Environmental.Local Government Maintenance        1
Crime.Road & Traffic Offenses                     1
Public services.Transport & Transit               1
Name: count, dtype: int64

In [15]:
df_stage_2 = pipeline._df_stage2
df_stage_2.category.value_counts()

df_pubser = df_stage_2[df_stage_2.main_category == 'Public services'].copy()




In [18]:
import tagging_posts as tp
reload(tp)

df_records =  pd.DataFrame(columns=['tag','tag_description'])

df_response = tp.tag_with_existing(df_pubser,df_records,context='Salient local issues',context_description='Locally salient public services issues in Bolsover',response_column='tag',response_column_description='tag_description')



In [33]:
reload(tp)
response_df = df_response[0]
new_records_df = tp.generate_new_tags(response_df[response_df.tag=='No tag'],df_records,context='Salient local issues',context_description='Locally salient public services issues in Bolsover',response_column='tag',response_column_description='tag_description')


In [35]:
new_records_df

Unnamed: 0,tag,tag_description
0,Potholes and Road Condition,"Discussions about potholes, poor road surfaces..."
1,Roadworks and Traffic Jams,"Posts regarding road closures, temporary traff..."
2,Bin Collections and Waste,"Issues related to bin collections, new food wa..."
3,Public Transport,"Concerns and questions about local bus routes,..."
4,Parking Problems,"Complaints about illegal, dangerous, or incons..."
5,Social and Elderly Care,"Discussions about care homes, home care, and s..."
6,Council Performance,"Critiques of council spending, decisions, and ..."
7,Dog Fouling,Complaints about dog mess not being cleaned up...
8,Postal and Courier Services,"Issues with Royal Mail, local couriers, and pa..."
9,Internet Outages,Reports of local internet and broadband servic...


In [None]:
import app.tagging_posts as tp
import app.tag_manager as tm
from importlib import reload
reload(tp)
reload(tm)

# --- Config ---
main_category = 'Public services'
context = 'locally salient issue'
ctx_desc = f'Local political/community issues ‚Äî locally salient {main_category} issue in {location_slug}'
avoid = 'local, locally salient, issue'

# --- 1. Get existing tags from TagManager ---
with tm.TagManager(tags_path=data_root / "tags" / location_slug) as tag_mgr:
    tags_record_df = tag_mgr.get_tags_for_category(main_category)[["tag", "tag_description"]].copy()

print(f"Existing tags for '{main_category}': {len(tags_record_df)}")
display(tags_record_df)

# --- 2. Tag with existing tags only ---
df_subset = pipeline._df_stage2[pipeline._df_stage2.main_category == main_category].copy()
print(f"\nPosts for '{main_category}': {len(df_subset)}")

tagged_df, updated_tags_df = tp.tag_with_existing(
    df_subset,
    tags_record_df,
    context=context,
    context_description=ctx_desc,
    avoid=avoid,
    more_specific_than_column='category',
    response_column='tag',
    response_column_description='tag_description',
)

n_tagged = (tagged_df['tag'] != 'No tag').sum()
n_untagged = (tagged_df['tag'] == 'No tag').sum()
print(f"\nAfter tag_with_existing: {n_tagged} tagged, {n_untagged} untagged")

# --- 3. Generate new tags from untagged posts ---
expanded_tags_df = tp.generate_new_tags(
    tagged_df,
    updated_tags_df,
    context=context,
    context_description=ctx_desc,
    avoid=avoid,
    response_column='tag',
    response_column_description='tag_description',
    verbose=True,
)
print(f"\nTags after generate_new_tags: {len(expanded_tags_df)}")
display(expanded_tags_df)

# --- 4. Re-tag only the still-untagged posts with the full tag list ---
still_untagged = tagged_df[tagged_df['tag'] == 'No tag'].copy()
if not still_untagged.empty:
    # Drop stale "No tag" values so iterate_tagging_posts_sequentially starts fresh
    still_untagged = still_untagged.drop(columns=['tag', 'tag_description'], errors='ignore')

    final_untagged_df, final_tags_df = tp.iterate_tagging_posts_sequentially(
        still_untagged,
        tags_record_df=expanded_tags_df,
        context=context,
        context_description=ctx_desc,
        avoid=avoid,
        more_specific_than_column='category',
        response_column='tag',
        response_column_description='tag_description',
    )
    # Combine: already-tagged rows + newly-tagged rows
    already_tagged = tagged_df[tagged_df['tag'] != 'No tag']
    combined_df = pd.concat([already_tagged, final_untagged_df]).sort_index()
else:
    combined_df = tagged_df
    final_tags_df = expanded_tags_df

# --- 5. Ingest tags into TagManager and assign tag_ids ---
with tm.TagManager(tags_path=data_root / "tags" / location_slug) as tag_mgr:
    for idx, row in combined_df.iterrows():
        tag = row.get("tag")
        desc = row.get("tag_description", "")
        if pd.isna(tag) or str(tag).strip() in ("", "No tag"):
            continue

        existing_mask = tag_mgr.df["tag"] == tag
        if existing_mask.any():
            tag_id = int(tag_mgr.df.loc[existing_mask, "id"].iloc[0])
            tag_mgr.update_tag(tag_id, tag, main_category, desc)
        else:
            tag_id = tag_mgr.add_new_tag(tag, main_category, desc)
        combined_df.at[idx, "tag_id"] = tag_id

    tag_mgr.save()

print(f"\nFinal results:")
print(f"  Total posts: {len(combined_df)}")
print(f"  Posts with tag_id: {combined_df['tag_id'].notna().sum()}")
print(f"  Tags in TagManager: {len(tag_mgr.df)}")
print(f"\nTag distribution:")
print(combined_df['tag'].value_counts())

In [36]:
import app.tagging_posts as tp
import app.tag_manager as tm
from importlib import reload
reload(tp)
reload(tm)

# --- Config ---
main_category = 'Public services'
context = 'locally salient issue'
ctx_desc = f'Local political/community issues ‚Äî locally salient {main_category} issue in {location_slug}'
avoid = 'local, locally salient, issue'

# --- 1. Get existing tags from TagManager ---
with tm.TagManager(tags_path=data_root / "tags" / location_slug) as tag_mgr:
    tags_record_df = tag_mgr.get_tags_for_category(main_category)[["tag", "tag_description"]].copy()

print(f"Existing tags for '{main_category}': {len(tags_record_df)}")
display(tags_record_df)

# --- 2. Tag with existing tags only ---
df_subset = pipeline._df_stage2[pipeline._df_stage2.main_category == main_category].copy()
print(f"\nPosts for '{main_category}': {len(df_subset)}")

tagged_df, updated_tags_df = tp.tag_with_existing(
    df_subset,
    tags_record_df,
    context=context,
    context_description=ctx_desc,
    avoid=avoid,
    more_specific_than_column='category',
    response_column='tag',
    response_column_description='tag_description',
)

n_tagged = (tagged_df['tag'] != 'No tag').sum()
n_untagged = (tagged_df['tag'] == 'No tag').sum()
print(f"\nAfter tag_with_existing: {n_tagged} tagged, {n_untagged} untagged")

# --- 3. Generate new tags from untagged posts ---
expanded_tags_df = tp.generate_new_tags(
    tagged_df,
    updated_tags_df,
    context=context,
    context_description=ctx_desc,
    avoid=avoid,
    response_column='tag',
    response_column_description='tag_description',
    verbose=True,
)
print(f"\nTags after generate_new_tags: {len(expanded_tags_df)}")
display(expanded_tags_df)

# --- 4. Re-tag only the still-untagged posts with the full tag list ---
still_untagged = tagged_df[tagged_df['tag'] == 'No tag'].copy()
if not still_untagged.empty:
    # Drop the 'tag'/'tag_description' cols so iterate doesn't see stale "No tag" values
    still_untagged = still_untagged.drop(columns=['tag', 'tag_description'], errors='ignore')

    final_untagged_df, final_tags_df = tp.iterate_tagging_posts_sequentially(
        still_untagged,
        tags_record_df=expanded_tags_df,
        context=context,
        context_description=ctx_desc,
        avoid=avoid,
        more_specific_than_column='category',
        response_column='tag',
        response_column_description='tag_description',
    )
    # Combine: already-tagged rows + newly-tagged rows
    already_tagged = tagged_df[tagged_df['tag'] != 'No tag']
    combined_df = pd.concat([already_tagged, final_untagged_df]).sort_index()
else:
    combined_df = tagged_df
    final_tags_df = expanded_tags_df

print(f"\nFinal results:")
print(f"  Total posts: {len(combined_df)}")
print(f"  Total tags in record: {len(final_tags_df)}")
print(f"\nTag distribution:")
print(combined_df['tag'].value_counts())

[2026-02-12T17:37:01] INFO app.tag_manager: TagManager initialized | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv'}
[2026-02-12T17:37:01] DEBUG app.tag_manager: Saving tag record | extra={'path': 'gs://categorum-test/hyperlocal/data/tags/bolsover/tag_record.csv', 'rows': 1}
Existing tags for 'Public services': 0


Unnamed: 0,tag,tag_description



Posts for 'Public services': 51

After tag_with_existing: 47 tagged, 4 untagged
[2026-02-12T17:40:27] DEBUG app.tagging_posts: generate_new_tags prompt | extra={'prompt': 'You are a helpful assistant that generates new tags for the **locally salient issue** category (Local political/community issues ‚Äî locally salient Public services issue in bolsover).\n\nBelow is a list of posts and their comments that could NOT be tagged with any existing tag.\nThere is also a list of existing tags and their descriptions for reference.\n\nYour task:\n1. Read through the untagged posts and identify new, distinct topics/entities/issues.\n2. Generate **exactly 10** new tags (and short des'}
[2026-02-12T17:40:53] DEBUG app.ai_wrapper: Response: ChatCompletion(id='gen-1770918027-oNmznqosL1ZbRxNxmjXD', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n[\n    {\n        "tag": "Potholes and Road Repair",\n        "tag_description": "Discussions 

Unnamed: 0,tag,tag_description,tag_id
0,Potholes and Road Repair,"Discussions about the poor condition of roads,...",
1,Roadworks and Traffic,"Concerns regarding traffic gridlock, road clos...",
2,Food Waste Bins,Posts about the introduction of the new food w...,
3,Public Transport,"Issues related to bus routes, schedules, and s...",
4,Parking Problems,"Complaints about illegal parking, obstructions...",
5,Social Care and Accessibility,"Topics concerning care homes, and services for...",
6,Council Spending,"Scrutiny of council expenditure, financial dec...",
7,Dog Fouling,Complaints about dog owners not cleaning up af...,
8,Postal and Parcel Services,"Issues with mail delivery, courier services, a...",
9,Waste and Bin Collection,"Concerns about general rubbish removal, missed...",



Final results:
  Total posts: 51
  Total tags in record: 13

Tag distribution:
tag
Wheelchair Transport                    1
Vehicle Parts for Disabled Drivers      1
Parking Problems                        1
Temporary Traffic Lights Malfunction    1
Name: count, dtype: int64


In [37]:
combined_df

Unnamed: 0,id,timestamp,url,urls,domains,type,body,is_sponsored,comments,shares,...,tags.location,tags.mp_name,shared_post_id,comment_texts,is_localpolitical,category,main_category,sub_category,tag,tag_description
32,1426202935865470,2026-02-04T14:44:06+00:00,https://www.facebook.com/groups/51006219414622...,[],,Post,Update on the pot hole on station road,False,14,1,...,Bolsover,,,[What crap the road is unsafe who inspected it...,True,Public services.Roads & Infrastructure,Public services,Roads & Infrastructure,,
191,1636432247373326,2026-02-06T09:45:29+00:00,https://www.facebook.com/groups/42807144487608...,[],,Post,Bad start to the morning- if anyone is going t...,False,69,16,...,Bolsover,,,[We are like a Third World country. \nSpending...,True,Public services.Roads & Infrastructure,Public services,Roads & Infrastructure,,
192,1635170477499503,2026-02-04T22:59:15+00:00,https://www.facebook.com/groups/42807144487608...,[],,Post,Does anyone know how long we have to wait befo...,False,4,1,...,Bolsover,,,[If you report them via the website they ‚Äúrepa...,True,Public services.Roads & Infrastructure,Public services,Roads & Infrastructure,,
220,1551161216001562,2026-02-04T11:34:00+00:00,https://www.facebook.com/groups/65394538572315...,[https://www.derbyshiretimes.co.uk/news/people...,[https://www.derbyshiretimes.co.uk],Post,https://www.derbyshiretimes.co.uk/news/people/...,False,2,0,...,Bolsover,,,[I hope its not too late. Not everybody can be...,True,Public services.Social Care,Public services,Social Care,,
287,1310427724229586,2026-02-09T21:18:02+00:00,https://www.facebook.com/groups/10474520305271...,[],,Post,LOL has anyone else noticed that the blue Mini...,False,17,0,...,Bolsover,,,"[It‚Äôll be fake, owner is a twat, Needs one eve...",True,Public services.Transport & Transit,Public services,Transport & Transit,,
341,4497661323892766,2026-02-09T18:28:19+00:00,https://www.facebook.com/groups/27652479738007...,[],,Post,Absolute carnage in village with traffic this ...,False,18,0,...,Bolsover,,,[God forbid people want to park outside their ...,True,Public services.Roads & Infrastructure,Public services,Roads & Infrastructure,,
357,26888984377368212,2026-02-10T07:52:47+00:00,https://www.facebook.com/groups/bragderbyshire...,[],,Post,Due to being disabled i have not been out of m...,False,19,2,...,Bolsover,,,[I‚Äôm happy to come and do it mate üëç drop me a ...,True,Public services.Social Care,Public services,Social Care,,
358,26888454087421241,2026-02-09T21:11:41+00:00,https://www.facebook.com/groups/bragderbyshire...,[],,Post,Does anyone know if there‚Äôs any public transpo...,False,8,0,...,Bolsover,,,[The number 1 bus takes you to Mansfield bus s...,True,Public services.Transport & Transit,Public services,Transport & Transit,,
384,3433161903491131,2026-02-08T06:38:24+00:00,https://www.facebook.com/groups/25685899332816...,[],,Post,Pot holes getting worse outside Heron again......,False,3,0,...,Bolsover,,,"[Worst in the whole village, is there any pari...",True,Public services.Roads & Infrastructure,Public services,Roads & Infrastructure,,
402,3432141736926481,2026-02-07T09:44:27+00:00,https://www.facebook.com/groups/25685899332816...,[],,Post,rubbish removal\nrubbish removal\nhello can an...,False,22,0,...,Bolsover,,,"[Drop us a message Ram it in waste services , ...",True,Public services.Waste & Sanitation,Public services,Waste & Sanitation,,
