## Анализ ЦА

#### Международный опыт

Парсинг сайта https://www.open2study.com/statistics/coursera-statistics/ с анализом ЦА платформы Coursera

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re

# Функция для получения HTML-страницы
def get_soup(url):
    response = requests.get(url)
    if response.status_code == 200:
        return BeautifulSoup(response.text, 'html.parser')
    else:
        print(f"Ошибка при запросе {url}: {response.status_code}")
        return None

url = "https://www.open2study.com/statistics/coursera-statistics/"

# Получаем страницу и таблицу
soup = get_soup(url)
tables = soup.find_all('table')

In [2]:
len(tables)

14

In [3]:
def get_data(table):
    data = []

    for row in table.find_all('tr'):
        cells = row.find_all(['td', 'th'])
        row_data = [cell.get_text(strip=True) for cell in cells]

        if cells:
            country_link = cells[1].find('a', href=True)
            country_name = cells[1].get_text(strip=True)

        data.append(row_data)

    return data

In [4]:
data = get_data(tables[1])
df = pd.DataFrame(data[1:], columns=data[0])
df['Number Of Coursera Learners'] = df['Number Of Coursera Learners'].str.replace(' million', '').astype(float)
df

Unnamed: 0,Year,Number Of Coursera Learners
0,2023,142.0
1,2022,118.0
2,2021,92.0
3,2020,71.0
4,2019,44.0
5,2018,35.0
6,2017,28.0
7,2016,21.0


In [5]:
import plotly.express as px

fig = px.bar(df[::-1], x='Year', y='Number Of Coursera Learners', title='Рост количества учеников платформы Coursera (млн чел.)')

fig.show()

In [6]:
data2 = get_data(tables[2])
df2 = pd.DataFrame(data2[1:], columns=data2[0])
df2['Share Of Coursera users'] = df2['Share Of Coursera users'].str.replace('%', '').astype(int)
df2

Unnamed: 0,Age,Share Of Coursera users
0,18 to 24 Years,21
1,25 to 34 Years,37
2,35 to 44 Years,22
3,45 to 54 Years,10
4,55 to 64 Years,5
5,Over 65 Years,3


In [7]:
fig = px.bar(df2, x='Age', y='Share Of Coursera users', title='Распределение пользователей Coursera по возрастам')

fig.show()

In [8]:
data3 = get_data(tables[3])
df3 = pd.DataFrame(data3[1:], columns=data3[0])
df3['Number Of Coursera Users'] = df3['Number Of Coursera Users'].str.replace('K', '000').astype(int)
df3

Unnamed: 0,Region,Number Of Coursera Users,Share Of Coursera Users
0,North America,17000,31%
1,Europe,12000,21%
2,Middle East,3000,5%
3,India,7000,13%
4,Asia Pacific,7000,13%
5,Africa,4000,8%
6,Latin America,5000,9%


In [9]:
region_coords = {
    'North America': (45, -100),
    'Europe': (50, 10),
    'Middle East': (25, 45),
    'India': (20, 77),
    'Asia Pacific': (20, 120),
    'Africa': (0, 20),
    'Latin America': (-20, -60)
}

df3['Latitude'] = df3['Region'].map(lambda x: region_coords[x][0])
df3['Longitude'] = df3['Region'].map(lambda x: region_coords[x][1])

fig = px.scatter_geo(df3,
                     lat='Latitude',
                     lon='Longitude',
                     size='Number Of Coursera Users',
                     hover_name='Region',
                     title='Распределение пользователей Coursera по регионам',
                     projection="robinson",
                     color='Number Of Coursera Users',
                     color_continuous_scale=px.colors.sequential.Plasma,
                     size_max=40)

fig.update_layout(
    title_text='Распределение пользователей Coursera по регионам',
    title_x=0.5,
    width=1200,
    height=800,
    geo=dict(
        showland=True,
        landcolor="rgb(243, 243, 243)",
        oceancolor="rgb(160, 220, 255)",
        showocean=True,
        lakecolor="rgb(160, 220, 255)",
    ),
)

fig.show()

In [10]:
data4 = get_data(tables[5])
df4 = pd.DataFrame(data4[1:], columns=data4[0])
df4['Percentage Of Coursera Learners'] = df4['Percentage Of Coursera Learners'].str.replace('%', '').astype(int)
df4

Unnamed: 0,Primary Goal To Enroll In Recent Course/ Program,Percentage Of Coursera Learners
0,Get better at skills to get hired for their fi...,24
1,Wanted to switch to a different role by develo...,29
2,Working on developing their skills to advance ...,32
3,Learn more about topics they are interested in.,15


In [11]:
column_name = 'Primary Goal To Enroll In Recent Course/ Program'
values_name = 'Percentage Of Coursera Learners'

fig = px.pie(df4, values=values_name, names=column_name,
             title=f'Primary Goal To Enroll In Recent Course/ Program',
             hover_data=[values_name],
             labels={column_name: column_name, values_name: values_name})

fig.update_layout(legend_title_text='Categories',
                  legend=dict(
                  x=0.8,
                  y=0.7,
                  xanchor='left',
                  yanchor='middle'
                  ))
fig.show()