<a href="https://colab.research.google.com/github/componavt/sns4human/blob/main/src/vk/vk_group_all_posts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Getting all posts from a VK group by a given domain name.

Получение всех постов из группы ВК по заданному доменному имени.

In [21]:
import pandas as pd
import requests
import csv  # for quoting
from config import TOKEN, VERSION

In [22]:
def get_all_posts(token, version, domain):
    offset = 0
    all_posts = []

    while True:
        response = requests.get(
            'https://api.vk.com/method/wall.get',
            params={
                'access_token': token,
                'v': version,
                'domain': domain,
                'offset': offset,
                'count': 100
            }
        )
        data = response.json()

        if 'response' in data:
            posts = data['response']['items']
            all_posts.extend(posts)
            if len(posts) < 100:
                return all_posts
            offset += 100
        elif 'error' in data:
            print(f"Error: {data['error']['error_msg']}")
            return []
        else:
            return []

In [23]:
def create_all_posts_csv(token, version, domain):
    posts = get_all_posts(token, version, domain)
    if not posts:
        print("No posts retrieved.")
        return

    df = pd.DataFrame(posts)

    # Process possible empty values in likes, reposts, views
    df['likes'] = df['likes'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)
    df['reposts'] = df['reposts'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)
    df['views'] = df['views'].apply(lambda x: int(x['count']) if isinstance(x, dict) else 0)

    df = df[['id', 'text', 'date', 'likes', 'reposts', 'views']].copy()

    # Process date field
    df.loc[:, 'date'] = pd.to_datetime(df['date'], unit='s')
    df.loc[:, 'date'] = df['date'].dt.tz_localize('UTC').dt.tz_convert('Europe/Moscow')
    df.loc[:, 'date'] = df['date'].astype(str)  # Явно преобразуем в строки

    # Remove newline characters in text
    df.loc[:, 'text'] = df['text'].str.replace('\n', ' ', regex=False)

    # Save CSV with quotes only for the 'text' column
    name = domain + ".csv"
    df.to_csv(name, index=False, quoting=csv.QUOTE_MINIMAL, quotechar='"',
              columns=['id', 'text', 'date', 'likes', 'reposts', 'views'],
              escapechar='\\')

In [24]:
domain = 'siogroups'

In [25]:
create_all_posts_csv(TOKEN, VERSION, domain)

 '2025-03-10 10:45:16' '2025-03-09 18:30:07' '2025-03-07 14:59:00'
 '2025-03-06 14:55:00' '2025-03-06 10:07:28' '2025-03-01 18:54:23'
 '2025-02-28 20:18:04' '2025-02-28 18:11:36' '2025-02-28 16:19:23'
 '2025-02-27 14:31:46' '2025-02-26 16:32:51' '2025-02-25 11:18:43'
 '2025-02-21 17:50:00' '2025-02-20 13:59:05' '2025-02-17 12:23:04'
 '2025-02-15 11:37:44' '2025-02-14 18:55:00' '2025-02-14 17:10:07'
 '2025-02-14 16:57:56' '2025-02-12 13:26:45' '2025-02-10 09:46:43'
 '2025-02-07 18:20:07' '2025-02-06 14:15:00' '2025-02-03 13:35:16'
 '2025-02-03 11:59:46' '2025-02-01 11:46:38' '2025-01-31 17:42:54'
 '2025-01-31 17:06:03' '2025-01-29 11:08:32' '2025-01-28 11:35:39'
 '2025-01-27 10:28:50' '2025-01-22 13:21:46' '2025-01-21 15:42:48'
 '2025-01-19 17:13:03' '2025-01-16 15:37:08' '2025-01-15 10:21:00'
 '2025-01-14 14:35:00' '2025-01-14 13:18:36' '2025-01-13 17:57:00'
 '2025-01-13 11:41:08' '2025-01-13 10:35:28' '2025-01-07 18:41:14'
 '2025-01-06 11:22:25' '2025-01-06 09:18:51' '2024-12-29 12:10