In [1]:
import vk_api
import envparse
from typing import NamedTuple, Optional
import pathlib
import pendulum
import json

In [2]:
class VkAuth:
    phone: str
    password: str
    session: Optional[vk_api.VkApi]

    def __init__(self, phone, password, session):
        self.phone: str = phone
        self.password: str = password
        self.session: Optional[vk_api.VkApi] = session


    @classmethod
    def from_env(cls):
        env = envparse.Env()
        env.read_envfile()

        return cls(
            phone=env.str('VK_PHONE'),
            password=env.str('VK_PASSWORD'),
            session=None
        )

    def get_session(self):
        if self.session is None:

            saved_config = pathlib.Path('./vk_config.v2.json')
            if saved_config.exists():
                self.session = vk_api.VkApi(self.phone)
            else:    
                def auth_handler():
                    key = input('Enter authentication code')
                    return key, True
    
                self.session = vk_api.VkApi(
                    self.phone,
                    self.password,
                    auth_handler=auth_handler)
            self.session.auth()
        return self.session

In [3]:
auth = VkAuth.from_env()

In [4]:
session = auth.get_session()
vk = session.get_api()

print(vk)

<vk_api.vk_api.VkApiMethod object at 0x7f3960170ca0>


### Same but with token

In [None]:
def get_auth():
    APP_ID = 7322078
    SERVICE_TOKEN = "e0a0d1a1e0a0d1a1e0a0d1a142e0cf687fee0a0e0a0d1a1bee8d7f9e3f8e17d82341955"

    session = vk_api.VkApi(
        login=auth.phone,
        token=SERVICE_TOKEN,
        app_id=APP_ID
    )
    return session

# cannot figure out how to use service token

In [None]:
session.auth()

In [None]:
group_ids = [
    19732513, # "Правда о прививках"
    457918, # Я ПРОТИВ ПРИВИВАНИЯ ДЕТЕЙ
    9093914, # Мамы и папы против прививок!!
    7362539, # Вся правда о прививках
    59728906, # Вся правда о прививках!!!
    182362555, # ИММУННЫЙ ОТВЕТ - 2 июня - Всероссийская акция
    38532412, # Прививки: «за» и «против»
]

In [None]:
from tqdm import tqdm_notebook as tqdm

In [None]:
import pandas as pd

### Fetch members

In [None]:
# group_id_to_analyze = group_ids[1]
group_id_to_analyze = group_ids[0]

In [None]:
total_fetched = 0

members = vk.groups.getMembers(
    group_id=group_id_to_analyze,
    fields="sex,bdate,city,country,education"
)
total_fetched += len(members['items'])
total = members['count']

member_list = {total_fetched: members['items']}
offset = total_fetched

while total_fetched < total:
    members = vk.groups.getMembers(
        group_id=group_id_to_analyze,
        fields="sex,bdate,city,country,education",
        offset=offset,
    )
    
    total_fetched += len(members['items'])
    offset += len(members['items'])
    member_list[total_fetched] = members['items']

In [None]:
member_list.keys()

In [None]:
dfs = [
    pd.DataFrame(items)
    for items in member_list.values()
]
df = pd.concat(dfs)
df.head()

In [None]:
df['university'].isna().sum()

In [None]:
datestamp = pendulum.today().strftime("%Y%b%d").lower()

df.to_csv(f'members_{group_id_to_analyze}_{datestamp}.csv')

In [None]:
group_id_to_analyze

### Fetch friends

In [5]:
fnames = !ls members_*.csv
df = pd.concat(
    [pd.read_csv(fname, index_col=0) for fname in fnames]
)
print('Total users', len(df))
df = df.drop_duplicates(subset='id')
print('Total uniq users', len(df.id.unique()))

Total users 80923
Total uniq users 70966


In [47]:
df.id.values[:25]

array([  2699,   3696,   9602,  43808,  51342,  68107,  69285,  79825,
       103821, 114482, 116598, 124850, 133089, 137038, 138818, 140915,
       140956, 147188, 150032, 152212, 152962, 162198, 165712, 167242,
       170946])

In [14]:
from collections import defaultdict

In [22]:
from tqdm import tqdm

In [25]:
def by_chunk(iterable, chunk_size=1000):
    """
    Basically, [.........] -> [[...], [...], [...]]

    for chunk in by_chunk(big_iterable):
        for entry in chunk:
            process(chunk)
    """
    iterable = iter(iterable)
    while True:
        chunk = list(itertools.islice(iterable, chunk_size))
        if not len(chunk):
            break
        yield chunk

In [4]:
from vkparsing.vk_auth_helper import fetch_user_friends_for_chunk
from vkparsing.parse_user_friends import get_user_ids

In [6]:
user_ids = get_user_ids('members_*.csv')

In [8]:
fetch_user_friends_for_chunk(user_ids[5:20], part=2020)

In [51]:
user_id_to_groups = defaultdict(list)
private_groups = 0
users_ids = df.id.values[:5]
for user_id in users_ids:
    total_fetched = 0

    try:
        group_list = vk.groups.get(
            user_id=user_id,
            filter=['groups', 'publics']
        )
    except vk_api.ApiError:
        private_groups += 1
        continue

    total_fetched += len(group_list['items'])
    total = group_list['count']

    groups = {total_fetched: group_list['items']}
    offset = total_fetched

    while total_fetched < total:
        more_members = vk.groups.get(
            user_id=user_id,
            offset=offset,
        )
    
        total_fetched += len(members['items'])
        offset += 1000
        member_list[total_fetched] = members['items']
        groups.extend(friends_list)
    user_id_to_groups[user_id] = groups
# ----
# print(' '.join([
#     f'Total private grouplists {private_groups},'
#     f'out of {df.id.count()} total users'
# ]))


  0%|          | 0/25 [00:00<?, ?it/s][A
  8%|▊         | 2/25 [00:00<00:05,  4.21it/s][A
 12%|█▏        | 3/25 [00:01<00:07,  2.80it/s][A
 16%|█▌        | 4/25 [00:01<00:07,  2.67it/s][A
 20%|██        | 5/25 [00:01<00:07,  2.58it/s][A
 24%|██▍       | 6/25 [00:02<00:07,  2.56it/s][A
 28%|██▊       | 7/25 [00:02<00:07,  2.51it/s][A
 32%|███▏      | 8/25 [00:03<00:07,  2.33it/s][A
 36%|███▌      | 9/25 [00:03<00:06,  2.37it/s][A
 40%|████      | 10/25 [00:04<00:06,  2.41it/s][A
 44%|████▍     | 11/25 [00:04<00:05,  2.46it/s][A
 48%|████▊     | 12/25 [00:04<00:05,  2.48it/s][A
 52%|█████▏    | 13/25 [00:05<00:04,  2.49it/s][A
 56%|█████▌    | 14/25 [00:05<00:04,  2.47it/s][A
 60%|██████    | 15/25 [00:06<00:04,  2.40it/s][A
 64%|██████▍   | 16/25 [00:06<00:03,  2.42it/s][A
 68%|██████▊   | 17/25 [00:06<00:03,  2.43it/s][A
 72%|███████▏  | 18/25 [00:07<00:02,  2.44it/s][A
 76%|███████▌  | 19/25 [00:07<00:02,  2.43it/s][A
 80%|████████  | 20/25 [00:08<00:02,  2.42it/s]

In [45]:
for k, v in user_id_to_groups.items():
    print(k, v.keys(), len(list(v.values())[0]))


637191 dict_keys([1031]) 1031


In [None]:
response = vk.wall.get(count=1)  # Используем метод wall.get

if response['items']:
    print(response['items'][0])

for group_id in group_ids:
    group_info = vk.groups.getById(group_id=group_id)
    members = vk.groups.getMembers(group_id=group_id)

    datestamp = pendulum.today().strftime('%Y%b%d').lower()
    fname = f'members_{group_id}_on_{datestamp}.json'
    with open(fname, 'w') as fp:
        json.dump(
            {
                'group_info': group_info,
                'members': members,
            },
            fp
        )
    print(
        f'saved {len(members)} members and group info in as',
        fname
    )

In [None]:
auth = VkAuth.from_env()
main(auth)