In [1]:
import pandas as pd
import requests as rq
import numpy as np
import pickle
import json
import re
from bs4 import BeautifulSoup

In [None]:
# https://api.kfc.com/api/store/v2/store.get_restaurants?kfcCityId=7575f6fe-1cc8-4e95-a63e-cb7008868fde - запрос ресторанов по городу
# https://api.kfc.com/api/store/v2/store.get_cities - запрос по всем городам
# https://api.kfc.com/api/menu/api/v1/menu.short/74013271/website/finger_lickin_good - запрос по городу меню

In [10]:
class KfcParser(object):
    
    def __init__(self):
        pass
    
    
    @property
    def cities(self):  #Метод извклекает все kfcCityId городов, где есть KFC рестораны
        GET_CITIES_URL = "https://api.kfc.com/api/store/v2/store.get_cities"
        data_dict = {}
        resp = rq.post(GET_CITIES_URL).text
        cities_dict = json.loads(resp)
        for city in cities_dict['value']['cities']:
            if city['defaultStore'] != None:
                data_dict[city['title']['ru']] = city['kfcCityId']
        return data_dict
    
    
    def extract_restaurants(self, kfcCityId):  #Метод извлекает все рестораны из данного города
        
        restaurants_data = {
            'storeId': [],
            'title': [],
            'streetAddress': [],
            'city': [],
            'coords': [],
            'metro': [],
            'storeManager': [],
            'phoneNumber': [],
            'startTimeLocal': [],
            'endTimeLocal': [],
            'features': [],
            'BreakfastStart': [],
            'BreakfastEnd': []
        }
        
        
        def navigate_json(obj, chain):
            for key in chain:
                if key in obj:
                    obj = obj[key]
                    if isinstance(obj, list) and key != chain[-1]:
                        obj = obj[0]
                    else:
                        continue
                else:
                    return None
            return obj
        
        resp = rq.get(f'https://api.kfc.com/api/store/v2/store.get_restaurants?kfcCityId={kfcCityId}').text
        data_dict = json.loads(resp)
        for rest in data_dict['searchResults']:
            restaurants_data['storeId'].append(
                navigate_json(rest, ['storePublic','storeId']))
            restaurants_data['title'].append(
                navigate_json(rest, ['storePublic','title','ru']))
            restaurants_data['streetAddress'].append(
                navigate_json(rest, ['storePublic','contacts','streetAddress','ru']))
            restaurants_data['city'].append(
                navigate_json(rest, ['storePublic','contacts','city','ru']))
            restaurants_data['coords'].append(
                navigate_json(rest, ['storePublic','contacts','coordinates','geometry','coordinates']))
            restaurants_data['metro'].append(
                navigate_json(rest, ['storePublic','contacts','navigationLandmarks','landmarkName','ru']))
            restaurants_data['storeManager'].append(
                navigate_json(rest, ['storePublic','contacts','storeManager','ru']))
            restaurants_data['phoneNumber'].append(
                navigate_json(rest, ['storePublic','contacts','phoneNumber']))
            restaurants_data['startTimeLocal'].append(
                navigate_json(rest, ['storePublic','openingHours','regular','startTimeLocal']))
            restaurants_data['endTimeLocal'].append(
                navigate_json(rest, ['storePublic','openingHours','regular','endTimeLocal']))
            restaurants_data['features'].append(
                navigate_json(rest, ['storePublic','features']))
            restaurants_data['BreakfastStart'].append(
                navigate_json(rest, ['storePublic','menues','availability','regular','startTimeLocal']))
            restaurants_data['BreakfastEnd'].append(
                navigate_json(rest, ['storePublic','menues','availability','regular','endTimeLocal']))
        return pd.DataFrame(restaurants_data)
    
    
    def extract_menu(self, restaurant_id):  #Метод извлекает все позиции по заданному ресторану
        CITY_MENU_URL = f"https://api.kfc.com/api/menu/api/v1/menu.short/{restaurant_id}/website/finger_lickin_good"
        resp = rq.get(CITY_MENU_URL).text
        data = json.loads(resp)
        products = data['value']['products']
        
        collected_data_dict = {
            'title': [],
            'id': [],  #id позиции в json файле
            'siteId': [],  #id позиции на сайте
            'type': [],  #тип позиции
            'price': [],
            'delivery': [],  #есть ли в доставке 
            'inStore': [],  #есть ли в магазине
            'volume': [],  
            'fats': [],
            'proteins': [],
            'carbs': [],
            'modifierGroups': []  #что входит в комбо или набор
        }

        for dish_id, dish in products.items():
            collected_data_dict['id'].append(dish_id)
            for feat in collected_data_dict.keys():
                if feat == 'id':
                    continue
                elif feat in dish:
                    if feat in ['volume', 'fats', 'proteins', 'carbs']:
                        collected_data_dict[feat].append(dish[feat].popitem()[-1])
                    elif feat == 'modifierGroups':
                        modifierGroups = {}
                        for i, subdish in enumerate(dish[feat]):
                            modifierGroups[f'option_{i+1}'] = [option['title'] for option in subdish['modifiers']]
                        collected_data_dict[feat].append(modifierGroups)
                    else:
                        collected_data_dict[feat].append(dish[feat])
                else:
                    collected_data_dict[feat].append(np.nan)

        df = pd.DataFrame(collected_data_dict)
        return df

In [9]:
a = KfcParser()
a.extract_restaurants('14c88db8-c975-4c1f-b18d-a430f87cc9b9')
a.extract_menu(74021806)

Unnamed: 0,title,id,siteId,type,price,delivery,inStore,volume,fats,proteins,carbs,modifierGroups
0,Шефбургер Джуниор Острый,2,2963,Dish,9900,True,True,161.0,10.1,13.3,22.0,"{'option_1': ['Ломтик сыра', 'Ломтик Бекона', ..."
1,Соус Кисло-Сладкий Чили,13,9665,Dish,3000,True,True,,,,,{}
2,"Чай Липтон Зеленый 0,4 л",32,3032,Dish,6900,,True,400.0,0.0,0.0,6.7,{}
3,9 Стрипсов Острых,33,1258,Dish,27400,True,True,279.0,12.8,23.1,8.1,{}
4,Пати Бокс,42,4675,Combo,10900,True,True,,,,,"{'option_1': ['Байтсы острые'], 'option_2': ['..."
...,...,...,...,...,...,...,...,...,...,...,...,...
154,Купон 1141,45356,1141,Combo,36900,,True,,,,,"{'option_1': ['Соус Чесночный', 'Соус Терияки'..."
155,Купон 4516,45408,4516,Combo,29900,,True,,,,,"{'option_1': ['Соус Сырный Оригинальный', 'Кет..."
156,Купон 1142,45496,1142,Combo,38900,,True,,,,,"{'option_1': ['Твистер Оригинальный', 'Твистер..."
157,Купон 1144,45614,1144,Combo,39400,,True,,,,,"{'option_1': ['Pepsi 0,8 л', 'Mirinda 0,8 л', ..."


In [None]:
def collect_data():
    main_df = pd.DataFrame()
    for city, kfcCityId in a.cities.items():
        restaurants_ids = a.extract_restaurants(kfcCityId)['storeId']
        for restaurant_id in restaurants_ids:
            df = a.extract_menu(restaurant_id)
            df['city'] = city
            df['id'] = restaurant_id
            main_df = pd.concat([main_df, df])
    return main_df

In [None]:
data = collect_data()

In [None]:
with open('data.pickle', 'wb') as file:
    pickle.dump(data, file)