<a href="https://colab.research.google.com/github/componavt/sns4human/blob/main/src/vk/vk_group_all_posts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Getting all posts from a VK group by a given domain name.

Получение всех постов из группы ВК по заданному доменному имени.

In [None]:
# List of names of VK groups
# domains = ['stupeniorthodox', 'club103835710', 'club151359929', 'club221681617']
# domains = ['rk_nationalmuseum','olonmus','museum_ptz', 'vepsmuseum', 'club226126304', 'etnodomrk']# museum_tokens.csv
# domains = ['echo_association', 'domderevnivoknavolok', 'public187190729', 'elamakeskus', 'vienakostamus',
#           'vienasaatio', 'kinnermaki', 'public41933338', 'id154043584', 'club67297564',
#           'haikola_kyla', 'club65008088']                                         # ethnocultural_tokens.csv
domains = ['karjalankielenkodi','mediacenter_periodika','karel_colorit','karjalanrahvahanliitto','club2562309', 'club_dk_padany', 'melnikpryazha']# multifunctional_tokens.csv

In [None]:
import pandas as pd
import requests
import csv  # for quoting
from config import TOKEN, VERSION

In [None]:
def get_all_posts(token, version, domain):
    offset = 0
    all_posts = []

    while True:
        response = requests.get(
            'https://api.vk.com/method/wall.get',
            params={
                'access_token': token,
                'v': version,
                'domain': domain,
                'offset': offset,
                'count': 100
            }
        )
        data = response.json()

        if 'response' in data:
            posts = data['response']['items']
            all_posts.extend(posts)
            if len(posts) < 100:
                return all_posts
            offset += 100
        elif 'error' in data:
            print(f"Error: {data['error']['error_msg']}")
            return []
        else:
            return []

In [None]:
def create_all_posts_csv(token, version, domain):
    posts = get_all_posts(token, version, domain)
    if not posts:
        print("No posts retrieved.")
        return

    df = pd.DataFrame(posts)

    # Process possible empty values in likes, reposts, views
    df['likes'] = df['likes'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)
    df['reposts'] = df['reposts'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)
    df['views'] = df['views'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)

    df = df[['id', 'text', 'date', 'likes', 'reposts', 'views']].copy()

    # Process date field
    df['date'] = (
      pd.to_datetime(df['date'], unit='s')
      .dt.tz_localize('UTC')
      .dt.tz_convert('Europe/Moscow')
      .dt.strftime('%Y-%m-%d %H:%M:%S')
    )

    # Remove newline characters in text
    df.loc[:, 'text'] = df['text'].str.replace('\n', ' ', regex=False)

    # Filtering empty posts from social networks (like 'sharing')
    df = df[df['text'].str.strip().astype(bool)]  # Remove lines with empty text
    if df.empty:
        print(f"No posts with text content found for {domain}")
        return

    # Save CSV with quotes only for the 'text' column
    name = domain + ".csv"
    df.to_csv(name, index=False, quoting=csv.QUOTE_MINIMAL, quotechar='"',
              columns=['id', 'text', 'date', 'likes', 'reposts', 'views'],
              escapechar='\\')

In [None]:
for d in domains:
    create_all_posts_csv(TOKEN, VERSION, d)