In [40]:
import requests
import json
import pandas as pd
import psycopg2

#### You need to research data from Itunes API of TOP paid applications in Russia for next tasks:
- define the model of data by spliting them to main data - some kind of daily rate of definite market research and guides which could present additional attributes of main data with relation's information for storing into DB
- make presentation of which tool you choose for scheduled ETL process into DB



In [41]:
url = 'https://rss.applemarketingtools.com/api/v2/ru/apps/top-paid/100/apps.json'
response = requests.get(url)
json_data = response.json()

In [42]:
keys_to_extract = {
    'title': [],
    'id': [],
    'author_name': [],
    'author_url': [],
    'self_link': [],
    'country': [],
    'icon': [],
    'updated': [],
    'copyright': []
}

for key in keys_to_extract:
    if key == 'author_name':
        keys_to_extract[key].append(json_data['feed']['author']['name'])
    elif key == 'author_url':
        keys_to_extract[key].append(json_data['feed']['author']['url'])
    elif key == 'self_link':
        keys_to_extract[key].append(json_data['feed']['links'][0]['self'])
    else:
        keys_to_extract[key].append(json_data['feed'][key])

app_main_info = pd.DataFrame(keys_to_extract)
app_main_info


Unnamed: 0,title,id,author_name,author_url,self_link,country,icon,updated,copyright
0,Топ платных приложений,https://rss.applemarketingtools.com/api/v2/ru/...,Apple,https://www.apple.com/,https://rss.applemarketingtools.com/api/v2/ru/...,ru,https://www.apple.com/favicon.ico,"Mon, 1 May 2023 16:27:41 +0000",Copyright © 2023 Apple Inc. All rights reserved.


In [43]:
apps = []
for result in json_data['feed']['results']:
    app = {}
    app['artistName'] = result['artistName']
    app['id'] = result['id']
    app['name'] = result['name']
    app['releaseDate'] = result['releaseDate']
    app['kind'] = result['kind']
    app['artworkUrl100'] = result['artworkUrl100']
    app['genres'] = [genre['name'] for genre in result['genres']] 
    app['url'] = result['url']
    apps.append(app)

df_results = pd.DataFrame(apps)
df_results = df_results.applymap(lambda x: None if x == [] else x)
df_results = df_results.explode('genres')
df_results

Unnamed: 0,artistName,id,name,releaseDate,kind,artworkUrl100,genres,url
0,Dmitry Filinsky,856861890,Антирадар HUD Speed Pro,2014-04-17,apps,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,,https://apps.apple.com/ru/app/%D0%B0%D0%BD%D1%...
1,Oleg Lavrynenko,976648744,Моя статистика для ВКонтакте,2015-06-03,apps,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,,https://apps.apple.com/ru/app/%D0%BC%D0%BE%D1%...
2,Threema GmbH,578665578,Threema. Безопасный мессенджер,2012-12-28,apps,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,,https://apps.apple.com/ru/app/threema-%D0%B1%D...
3,Shadow Launch Technology Limited,932747118,Shadowrocket,2015-04-14,apps,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,,https://apps.apple.com/ru/app/shadowrocket/id9...
4,Vitaly Shmuradko,606147643,Антирадар Стрелка,2013-04-20,apps,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,,https://apps.apple.com/ru/app/%D0%B0%D0%BD%D1%...
...,...,...,...,...,...,...,...,...
96,wonderkind GmbH,1162675136,Тигренок: Пожарные,2016-12-22,apps,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,Образование,https://apps.apple.com/ru/app/%D1%82%D0%B8%D0%...
97,Piksoft Inc.,342548956,Турбоскан: Быстрый сканер,2009-12-07,apps,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,Бизнес,https://apps.apple.com/ru/app/%D1%82%D1%83%D1%...
97,Piksoft Inc.,342548956,Турбоскан: Быстрый сканер,2009-12-07,apps,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,Производительность,https://apps.apple.com/ru/app/%D1%82%D1%83%D1%...
98,Axidep LLC,908563635,Полиглот - Английские артикли,2014-08-21,apps,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,Образование,https://apps.apple.com/ru/app/%D0%BF%D0%BE%D0%...


Данные из первого df могут служить основными, а из второго df - дополнительными
Можно создать таблицу для данных из первого df, а также создать там столбец с уникальными id из второго df
Данные из второго df необходимо раскидать по разным справочникам. 
1) Справочник авторов: app_author_id, app_author_name, app_id 
2) Справочник приложений: app_id, app_name, app_release_date, app_kind, app_pic, app_url, author_id 
3) Справочник жанров: app_genre_id, app_genre_name
4) Таблица, которая свяжет предыдущие 3 таблицы: app_result_id, app_id, author_id, genre_id

В качестве ETL можно использовать Apache Airflow