## Анализ ЦА

#### Международный опыт

Парсинг сайта https://www.open2study.com/statistics/coursera-statistics/ с анализом ЦА платформы Coursera

In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# Функция для получения HTML-страницы
def get_soup(url):
    response = requests.get(url)
    if response.status_code == 200:
        return BeautifulSoup(response.text, 'html.parser')
    else:
        print(f"Ошибка при запросе {url}: {response.status_code}")
        return None

url = "https://www.open2study.com/statistics/coursera-statistics/"

# Получаем страницу и таблицу
soup = get_soup(url)
tables = soup.find_all('table')

In [22]:
len(tables)

14

In [23]:
def get_data(table):
    data = []

    for row in table.find_all('tr'):
        cells = row.find_all(['td', 'th'])
        row_data = [cell.get_text(strip=True) for cell in cells]

        if cells:
            country_link = cells[1].find('a', href=True)
            country_name = cells[1].get_text(strip=True)

        data.append(row_data)

    return data

In [24]:
data = get_data(tables[1])
df = pd.DataFrame(data[1:], columns=data[0])
df['Number Of Coursera Learners'] = df['Number Of Coursera Learners'].str.replace(' million', '').astype(float)
df

Unnamed: 0,Year,Number Of Coursera Learners
0,2023,142.0
1,2022,118.0
2,2021,92.0
3,2020,71.0
4,2019,44.0
5,2018,35.0
6,2017,28.0
7,2016,21.0


In [25]:
import plotly.express as px

fig = px.bar(df[::-1], x='Year', y='Number Of Coursera Learners', title='Рост количества учеников платформы Coursera (млн чел.)')

fig.show()

In [26]:
data2 = get_data(tables[2])
df2 = pd.DataFrame(data2[1:], columns=data2[0])
df2['Share Of Coursera users'] = df2['Share Of Coursera users'].str.replace('%', '').astype(int)
df2

Unnamed: 0,Age,Share Of Coursera users
0,18 to 24 Years,21
1,25 to 34 Years,37
2,35 to 44 Years,22
3,45 to 54 Years,10
4,55 to 64 Years,5
5,Over 65 Years,3


In [27]:
fig = px.bar(df2, x='Age', y='Share Of Coursera users', title='Распределение пользователей Coursera по возрастам')

fig.show()

In [28]:
data3 = get_data(tables[3])
df3 = pd.DataFrame(data3[1:], columns=data3[0])
df3['Number Of Coursera Users'] = df3['Number Of Coursera Users'].str.replace('K', '000').astype(int)
df3

Unnamed: 0,Region,Number Of Coursera Users,Share Of Coursera Users
0,North America,17000,31%
1,Europe,12000,21%
2,Middle East,3000,5%
3,India,7000,13%
4,Asia Pacific,7000,13%
5,Africa,4000,8%
6,Latin America,5000,9%


In [29]:
region_coords = {
    'North America': (45, -100),
    'Europe': (50, 10),
    'Middle East': (25, 45),
    'India': (20, 77),
    'Asia Pacific': (20, 120),
    'Africa': (0, 20),
    'Latin America': (-20, -60)
}

df3['Latitude'] = df3['Region'].map(lambda x: region_coords[x][0])
df3['Longitude'] = df3['Region'].map(lambda x: region_coords[x][1])

fig = px.scatter_geo(df3,
                     lat='Latitude',
                     lon='Longitude',
                     size='Number Of Coursera Users',
                     hover_name='Region',
                     title='Распределение пользователей Coursera по регионам',
                     projection="robinson",
                     color='Number Of Coursera Users',
                     color_continuous_scale=px.colors.sequential.Plasma,
                     size_max=40)

fig.update_layout(
    title_text='Распределение пользователей Coursera по регионам',
    title_x=0.5,
    width=1200,
    height=800,
    geo=dict(
        showland=True,
        landcolor="rgb(243, 243, 243)",
        oceancolor="rgb(160, 220, 255)",
        showocean=True,
        lakecolor="rgb(160, 220, 255)",
    ),
)

fig.show()

In [30]:
data4 = get_data(tables[5])
df4 = pd.DataFrame(data4[1:], columns=data4[0])
df4['Percentage Of Coursera Learners'] = df4['Percentage Of Coursera Learners'].str.replace('%', '').astype(int)
df4

Unnamed: 0,Primary Goal To Enroll In Recent Course/ Program,Percentage Of Coursera Learners
0,Get better at skills to get hired for their fi...,24
1,Wanted to switch to a different role by develo...,29
2,Working on developing their skills to advance ...,32
3,Learn more about topics they are interested in.,15


In [31]:
column_name = 'Primary Goal To Enroll In Recent Course/ Program'
values_name = 'Percentage Of Coursera Learners'

fig = px.pie(df4, values=values_name, names=column_name,
             title=f'Primary Goal To Enroll In Recent Course/ Program',
             hover_data=[values_name],
             labels={column_name: column_name, values_name: values_name})

fig.update_layout(legend_title_text='Categories',
                  legend=dict(
                  x=0.8,
                  y=0.7,
                  xanchor='left',
                  yanchor='middle'
                  ))
fig.show()

### Дашборд

In [17]:
!pip install dash
!pip install pyngrok

Collecting dash
  Downloading dash-3.0.0-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.1,>=1.0.4 (from dash)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting Werkzeug<3.1 (from dash)
  Downloading werkzeug-3.0.6-py3-none-any.whl.metadata (3.7 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting stringcase>=1.2.0 (from dash)
  Downloading stringcase-1.2.0.tar.gz (3.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading dash-3.0.0-py3-none-any.whl (8.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask-3.0.3-py3-none-any.whl (101 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.7/101.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading werkzeug-3.0.6-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m228.0/228.0 kB[0m [3

In [36]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import pandas as pd
import numpy as np
from pyngrok import ngrok

fig1 = px.bar(df[::-1], x='Year', y='Number Of Coursera Learners', title='Рост количества учеников Coursera')
fig2 = px.bar(df2, x='Age', y='Share Of Coursera users', title='Распределение пользователей Coursera по возрастам')
fig3 = px.scatter_geo(df3, lat='Latitude', lon='Longitude', size='Number Of Coursera Users', hover_name='Region',
                      title='Распределение пользователей Coursera по регионам', projection="robinson",
                      color='Number Of Coursera Users', color_continuous_scale=px.colors.sequential.Plasma, size_max=20)
fig4 = px.pie(df4, values=values_name, names=column_name, title='Основная цель обучения на курсах')

fig1.update_layout(width=500, height=400)
fig2.update_layout(width=500, height=400)
fig3.update_layout(width=600, height=400)

fig4.update_layout(height=400, width=600, legend_title_text='Categories',
    legend=dict(font=dict(size=7), x=1, y=1))

app = dash.Dash(__name__)
app.layout = html.Div([
    html.H1("Интерактивный дашборд с анализом целевой аудитории платформы Coursera", style={'text-align': 'center'}),

    html.Div([
        html.Div(dcc.Graph(figure=fig1), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig2), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig3), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig4), style={'padding': '0px', 'margin': '0'})
    ], style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(2, 1fr)',
        'gap': '5px',
        'padding': '0px',
        'gridTemplateRows': 'repeat(2, minmax(100px, auto))',
        'grid-row-gap': '0px'
    })
])

ngrok.set_auth_token("токен")

ngrok.kill()

public_url = ngrok.connect(8050)
print("Доступно по ссылке:", public_url)

if __name__ == '__main__':
    from google.colab import output
    output.serve_kernel_port_as_window(8050)
    app.run(port=8050, debug=True)

Доступно по ссылке: NgrokTunnel: "https://4200-34-106-60-16.ngrok-free.app" -> "http://localhost:8050"
Try `serve_kernel_port_as_iframe` instead. [0m


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Анализ подписчиков сообщества Нетология через ВК API

In [1]:
import requests
import time

# Ваш токен и ID сообщества
ACCESS_TOKEN = 'токен'
GROUP_ID = '30159897'  # ID сообщества
VK_API_VERSION = '5.131'

# Функция для получения всех подписчиков сообщества
def get_all_group_members(group_id, access_token):
    all_members = []
    offset = 0
    count = 1000  # Максимальное количество подписчиков за один запрос

    while True:
        url = 'https://api.vk.com/method/groups.getMembers'
        params = {
            'group_id': group_id,
            'access_token': access_token,
            'v': VK_API_VERSION,
            'count': count,
            'offset': offset
        }
        response = requests.get(url, params=params).json()

        if 'response' in response:
            members = response['response']['items']
            all_members.extend(members)
            if len(members) < count:
                break  # Если подписчиков меньше, чем count, значит, это последняя страница
            offset += count
        else:
            print("Ошибка:", response)
            break

        # Задержка для соблюдения лимитов API
        time.sleep(0.34)  # Около 3 запросов в секунду

    return all_members

In [2]:
member_ids = get_all_group_members(GROUP_ID, ACCESS_TOKEN)
len(member_ids)

106673

In [23]:
!pip install datetime

Collecting datetime
  Downloading DateTime-5.5-py3-none-any.whl.metadata (33 kB)
Collecting zope.interface (from datetime)
  Downloading zope.interface-7.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading zope.interface-7.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (259 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.8/259.8 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: zope.interface, datetime
Successfully installed datetime-5.5 zope.interface-7.2


In [3]:
from datetime import datetime

VK_API_VERSION = '5.131'

def get_user_ages_genders_and_cities(user_ids, access_token):
    url = 'https://api.vk.com/method/users.get'
    all_data = []

    # Разделяем user_ids на части по 500 ID за запрос
    chunk_size = 500
    for i in range(0, len(user_ids), chunk_size):
        chunk = user_ids[i:i + chunk_size]
        params = {
            'user_ids': ','.join(map(str, chunk)),
            'access_token': access_token,
            'v': VK_API_VERSION,
            'fields': 'bdate,sex,city'
        }

        response = requests.get(url, params=params).json()
        for user in response['response']:
            # Извлекаем возраст, пол и город
            bdate = user.get('bdate', None)
            sex = user.get('sex', None)
            city = user.get('city', {}).get('title', None) if 'city' in user else None

            # Вычисляем возраст, если дата рождения указана
            age = None
            if bdate:
                try:
                    birth_date = datetime.strptime(bdate, "%d.%m.%Y")
                    today = datetime.today()
                    age = today.year - birth_date.year
                    if (today.month, today.day) < (birth_date.month, birth_date.day):
                        age -= 1
                except ValueError:
                    pass

            if sex == 1:
                gender = "женский"
            elif sex == 2:
                gender = "мужской"
            else:
                gender = None

            all_data.append({
                'id': user['id'],
                'возраст': age,
                'пол': gender,
                'город': city
            })

    return all_data

In [4]:
all_data = get_user_ages_genders_and_cities(member_ids, ACCESS_TOKEN)

In [5]:
import pandas as pd
df = pd.DataFrame(all_data, columns=["id", "возраст", "пол", "город"])
df

Unnamed: 0,id,возраст,пол,город
0,19,,женский,Saint Petersburg
1,243,37.0,женский,Saint Petersburg
2,251,,мужской,
3,545,38.0,мужской,Saint Petersburg
4,847,,женский,Novosibirsk
...,...,...,...,...
106668,1030059416,50.0,женский,
106669,1031229558,25.0,женский,
106670,1033087051,33.0,женский,Saint Petersburg
106671,1034047294,32.0,женский,Saint Petersburg


In [7]:
import plotly.express as px

df_cleaned_age = df[(df['возраст'].notna()) & (df['возраст'] <= 100)]

fig1 = px.histogram(df_cleaned_age, x='возраст', title='Распределение возраста среди подписчиков сообщества Нетология в ВК')
fig1.update_xaxes(
    title_text='Возраст',
    dtick=5,
    range=[14, 100]
)

fig1.update_yaxes(
    title_text='Количество',
    dtick=500,
    range=[0, 2000]
)
fig1.show()

In [8]:
df_cleaned_gender = df.dropna(subset='пол')

fig2 = px.pie(
    df_cleaned_gender,
    names='пол',
    title='Распределение по полу',
    hole=0.4
)

fig2.update_layout(
    legend=dict(
        title='Пол',
        x=0.65,
        y=0.7
    )
)

fig2.show()

In [10]:
from urllib.parse import quote

# Получаем координаты городов
def get_coordinates(city_name):
    encoded_city_name = quote(city_name)

    url = f"https://nominatim.openstreetmap.org/search?q={encoded_city_name}&format=json"

    headers = {
        'User-Agent': 'MyApp/1.0 (myapp@example.com)'
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        if data:
            return float(data[0]['lat']), float(data[0]['lon'])
        else:
            return None
    else:
        return None

df_cleaned_cities = df.dropna(subset='город')
coordinates_dict = {}

# Уникальные города
unique_cities = df_cleaned_cities['город'].unique()[:500]

for city in unique_cities:
    coordinates = get_coordinates(city)
    if coordinates:
        coordinates_dict[city] = coordinates

df_cleaned_cities['широта'] = df_cleaned_cities['город'].map(lambda x: coordinates_dict.get(x, (None, None))[0])
df_cleaned_cities['долгота'] = df_cleaned_cities['город'].map(lambda x: coordinates_dict.get(x, (None, None))[1])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [11]:
df_cleaned_cities = df_cleaned_cities.dropna(subset='долгота')
df_cleaned_cities = df_cleaned_cities.dropna(subset='широта')
df_cleaned_cities

Unnamed: 0,id,возраст,пол,город,широта,долгота
0,19,,женский,Saint Petersburg,27.770048,-82.635908
1,243,37.0,женский,Saint Petersburg,27.770048,-82.635908
3,545,38.0,мужской,Saint Petersburg,27.770048,-82.635908
4,847,,женский,Novosibirsk,54.967814,82.951599
5,852,,мужской,Saint Petersburg,27.770048,-82.635908
...,...,...,...,...,...,...
106662,1027162871,,мужской,Beograd,44.817813,20.456897
106666,1028552533,44.0,женский,Novokuznetsk,53.757590,87.135849
106670,1033087051,33.0,женский,Saint Petersburg,27.770048,-82.635908
106671,1034047294,32.0,женский,Saint Petersburg,27.770048,-82.635908


In [12]:
fig3 = px.scatter_geo(df_cleaned_cities,
                     lat='широта',
                     lon='долгота',
                     title='Распределение пользователей Нетология по городам',
                     projection="robinson"
                     )

fig3.update_layout(
    title_text='Распределение пользователей Нетология по городам',
    title_x=0.5,
    width=1200,
    height=800,
    geo=dict(
        showland=True,
        landcolor="rgb(243, 243, 243)",
        oceancolor="rgb(160, 220, 255)",
        showocean=True,
        lakecolor="rgb(160, 220, 255)",
    ),
)

fig3.show()

### Дашборд 2

In [13]:
fig1.update_layout(width=700, height=400)
fig2.update_layout(width=500, height=400, legend=dict(title='Пол', x=1, y=1))
fig3.update_layout(width=600, height=500)

In [20]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from pyngrok import ngrok

app2 = dash.Dash(__name__)
app2.layout = html.Div([
    html.H1("Интерактивный дашборд с анализом подписчиков сообщества Нетология в ВК", style={'text-align': 'center'}),

    html.Div([
        html.Div(dcc.Graph(figure=fig1), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig2), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig3), style={'padding': '0px', 'margin': '0'})
    ], style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(2, 1fr)',
        'gap': '0px',
        'padding': '0px',
        'gridTemplateRows': 'repeat(2, minmax(100px, auto))',
        'grid-row-gap': '0px'
    })
])

ngrok.set_auth_token("токен")

ngrok.kill()

public_url_2 = ngrok.connect(8050)
print("Доступно по ссылке:", public_url_2)

if __name__ == '__main__':
    from google.colab import output
    output.serve_kernel_port_as_window(8050)
    app2.run(port=8050, debug=True)

Доступно по ссылке: NgrokTunnel: "https://ebf8-34-106-60-16.ngrok-free.app" -> "http://localhost:8050"
Try `serve_kernel_port_as_iframe` instead. [0m


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Анализ подписчиков сообщества Яндекс Практикум через ВК API

In [53]:
GROUP_ID_2 = '176471180'
member_ids_2 = get_all_group_members(GROUP_ID_2, ACCESS_TOKEN)
len(member_ids_2)

104496

In [54]:
all_data_2 = get_user_ages_genders_and_cities(member_ids_2, ACCESS_TOKEN)
all_data_2

[{'id': 251, 'возраст': None, 'пол': 'мужской', 'город': None},
 {'id': 493, 'возраст': 39, 'пол': 'женский', 'город': None},
 {'id': 859, 'возраст': 31, 'пол': 'мужской', 'город': None},
 {'id': 1001, 'возраст': None, 'пол': 'мужской', 'город': 'Saint Petersburg'},
 {'id': 1215, 'возраст': None, 'пол': 'мужской', 'город': None},
 {'id': 1305, 'возраст': None, 'пол': 'мужской', 'город': 'Moscow'},
 {'id': 1470, 'возраст': None, 'пол': 'женский', 'город': 'Saint Petersburg'},
 {'id': 1566, 'возраст': 35, 'пол': 'женский', 'город': 'Moscow'},
 {'id': 1858, 'возраст': 36, 'пол': 'женский', 'город': 'Saint Petersburg'},
 {'id': 1909, 'возраст': 36, 'пол': 'женский', 'город': 'Saint Petersburg'},
 {'id': 2014, 'возраст': 36, 'пол': 'женский', 'город': 'Saint Petersburg'},
 {'id': 2175, 'возраст': None, 'пол': 'мужской', 'город': 'Krasnodar'},
 {'id': 2705, 'возраст': 36, 'пол': 'мужской', 'город': 'Saint Petersburg'},
 {'id': 2932, 'возраст': None, 'пол': 'мужской', 'город': 'Saint Petersbu

In [55]:
df_2 = pd.DataFrame(all_data_2, columns=["id", "возраст", "пол", "город"])
df_2

Unnamed: 0,id,возраст,пол,город
0,251,,мужской,
1,493,39.0,женский,
2,859,31.0,мужской,
3,1001,,мужской,Saint Petersburg
4,1215,,мужской,
...,...,...,...,...
104491,1032652982,,женский,
104492,1033087051,33.0,женский,Saint Petersburg
104493,1033655266,39.0,женский,Astana
104494,1034047294,32.0,женский,Saint Petersburg


In [56]:
df_2_cleaned_age = df_2[(df_2['возраст'].notna()) & (df_2['возраст'] <= 100)]

fig = px.histogram(df_2_cleaned_age, x='возраст', title='Распределение возраста среди подписчиков сообщества Яндекс Практикум в ВК')
fig.update_xaxes(
    title_text='Возраст',
    dtick=5,
    range=[14, 100]
)

fig.update_yaxes(
    title_text='Количество',
    dtick=500,
    range=[0, 2000]
)
fig.show()

In [57]:
df_2_cleaned_gender = df_2.dropna(subset='пол')

fig = px.pie(
    df_2_cleaned_gender,
    names='пол',
    title='Распределение по полу',
    hole=0.4
)

fig.update_layout(
    legend=dict(
        title='Пол',
        x=0.65,
        y=0.7
    )
)

fig.show()

In [58]:
df_2_cleaned_cities = df_2.dropna(subset='город')

# Уникальные города
unique_cities = df_2_cleaned_cities['город'].unique()[:500]

for city in unique_cities:
    if city not in coordinates_dict:
        coordinates = get_coordinates(city)
        if coordinates:
            coordinates_dict[city] = coordinates

df_2_cleaned_cities['широта'] = df_2_cleaned_cities['город'].map(lambda x: coordinates_dict.get(x, (None, None))[0])
df_2_cleaned_cities['долгота'] = df_2_cleaned_cities['город'].map(lambda x: coordinates_dict.get(x, (None, None))[1])
df_2_cleaned_cities



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,id,возраст,пол,город,широта,долгота
3,1001,,мужской,Saint Petersburg,27.770048,-82.635908
5,1305,,мужской,Moscow,46.732388,-117.000165
6,1470,,женский,Saint Petersburg,27.770048,-82.635908
7,1566,35.0,женский,Moscow,46.732388,-117.000165
8,1858,36.0,женский,Saint Petersburg,27.770048,-82.635908
...,...,...,...,...,...,...
104489,1030296030,,мужской,Saint Petersburg,27.770048,-82.635908
104492,1033087051,33.0,женский,Saint Petersburg,27.770048,-82.635908
104493,1033655266,39.0,женский,Astana,1.563635,110.346048
104494,1034047294,32.0,женский,Saint Petersburg,27.770048,-82.635908


In [59]:
df_2_cleaned_cities = df_2_cleaned_cities.dropna(subset='долгота')
df_2_cleaned_cities = df_2_cleaned_cities.dropna(subset='широта')
df_2_cleaned_cities

Unnamed: 0,id,возраст,пол,город,широта,долгота
3,1001,,мужской,Saint Petersburg,27.770048,-82.635908
5,1305,,мужской,Moscow,46.732388,-117.000165
6,1470,,женский,Saint Petersburg,27.770048,-82.635908
7,1566,35.0,женский,Moscow,46.732388,-117.000165
8,1858,36.0,женский,Saint Petersburg,27.770048,-82.635908
...,...,...,...,...,...,...
104489,1030296030,,мужской,Saint Petersburg,27.770048,-82.635908
104492,1033087051,33.0,женский,Saint Petersburg,27.770048,-82.635908
104493,1033655266,39.0,женский,Astana,1.563635,110.346048
104494,1034047294,32.0,женский,Saint Petersburg,27.770048,-82.635908


In [60]:
fig = px.scatter_geo(df_2_cleaned_cities,
                     lat='широта',
                     lon='долгота',
                     title='Распределение подписчиков сообщества Яндекс Практикум в ВК по городам',
                     projection="robinson"
                     )

fig.update_layout(
    title_text='Распределение подписчиков сообщества Яндекс Практикум в ВК по городам',
    title_x=0.5,
    width=1200,
    height=800,
    geo=dict(
        showland=True,
        landcolor="rgb(243, 243, 243)",
        oceancolor="rgb(160, 220, 255)",
        showocean=True,
        lakecolor="rgb(160, 220, 255)",
    ),
)

fig.show()

### Дашборд 3

In [None]:
fig1.update_layout(width=700, height=400)
fig2.update_layout(width=500, height=400, legend=dict(title='Пол', x=1, y=1))
fig3.update_layout(width=600, height=500)

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from pyngrok import ngrok

app2 = dash.Dash(__name__)
app2.layout = html.Div([
    html.H1("Интерактивный дашборд с анализом подписчиков сообщества Нетология в ВК", style={'text-align': 'center'}),

    html.Div([
        html.Div(dcc.Graph(figure=fig1), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig2), style={'padding': '0px', 'margin': '0'}),
        html.Div(dcc.Graph(figure=fig3), style={'padding': '0px', 'margin': '0'})
    ], style={
        'display': 'grid',
        'gridTemplateColumns': 'repeat(2, 1fr)',
        'gap': '0px',
        'padding': '0px',
        'gridTemplateRows': 'repeat(2, minmax(100px, auto))',
        'grid-row-gap': '0px'
    })
])

ngrok.set_auth_token("токен")

ngrok.kill()

public_url_2 = ngrok.connect(8050)
print("Доступно по ссылке:", public_url_2)

if __name__ == '__main__':
    from google.colab import output
    output.serve_kernel_port_as_window(8050)
    app2.run(port=8050, debug=True)