In [1]:
import requests
import json
import pandas as pd


# Request course data with URL.

In [2]:
url = 'https://api.pwskills.com/v1/course/63a2eb428899436daf7eb489?withAllCourseMetas=true&ignoreInActive=true'

r = requests.get(url)
r

<Response [200]>

In [3]:
from datetime import datetime

print(f"Last time runs on {datetime.now():%d %h, %Y}.")

Last time runs on 03 Mar, 2023.


## Save/Export course data in json format.

In [4]:
if r.status_code == 200:
    json.dump(r.json(), open('../data/courses/_course_data.json', 'w'), indent=2)
else:
    exit()

In [5]:
# Get data as python dictionary
data = r.json()['data']
data.keys()

dict_keys(['_id', 'isJobGuaranteeProgram', 'isJobAssistanceProgram', 'active', 'platformType', 'tags', 'labPlans', 'title', 'description', 'mode', 'seo', 'pricing', 'batches', 'faq', 'createdAt', 'updatedAt', 'img', 'categoryId', 'classTimings', 'mobilePricing', 'videoURL', 'instructorsDetails', 'courseMetas'])

# Course Overview

In [6]:
align = 22

# Title of the Course
print('Name of Course:'.ljust(align), data['title'])

# Price of the Course
course_price = round(data['pricing']['IN'] - (data['pricing']['IN'] * data['pricing']['discount']/100))
print('Price of Course:'.ljust(align), f'₹{course_price}')

# Instructors Name
inst_names = [i['name'] for i in data['instructorsDetails']]
print('Name of instructors:'.ljust(align), inst_names)

# Course Certificate Benchmark
cert_bench = data['courseMetas'][0]['certificateBenchmark']
print('Certificate Benchmark:'.ljust(align), f'{cert_bench}%')

# Language of the Course
lang = data['courseMetas'][0]['overview']['language']
print('Language of Course:'.ljust(align), lang)

# Course duration
duration = data['courseMetas'][0]['duration']
print('Course duration:'.ljust(align), duration)

Name of Course:        Data Science masters
Price of Course:       ₹2975
Name of instrutors:    ['Krish Naik', 'Sudhanshu Kumar']
Certificate Benchmark: 75%
Language of Course:    hinglish
Course duration:       7-8 months


In [7]:
# Get course meta data
meta: dict = data['courseMetas'][0]
meta.keys()

dict_keys(['instructors', '_id', 'certificateBenchmark', 'courseId', 'overview', 'curriculum', 'projects', 'createdAt', 'updatedAt', '__v', 'duration'])

## What you can learn from this course?

In [8]:
learn = meta['overview']['learn']

# print(f"You can learn {len(learn)}+ different types of topics in this course \
# such as {', '.join(learn[:-1]).title()}, etc.")

print(f'You can learn {len(learn)}+ different types of topics in this course.')

for i in learn:
    print(f'  - {i}')

You can learn 15+ different types of topics in this course.
  - Python
  - Statistics
  - Machine learning
  - Deep learning
  - Computer vision
  - Natural language processing
  - Big Data
  - Apache Spark
  - Apache Kafka
  - Data Analytics
  - PowerBI
  - Tableau
  - Databases
  - Data Science Workflow
  - Real Time Data Science Projects


## Projects in this course?

In [None]:
projects = meta['projects']

In [None]:
paren_proj = (pd.DataFrame([i for i in projects if len(i) == 2])
                .rename(columns={'_id': 'parentId', 'title': 'parentTitle'}))
child_proj = (pd.DataFrame([i for i in projects if len(i) != 2])
                .rename(columns={'_id': 'childId', 'parent': 'parentId', 'title': 'childTitle'}))

In [None]:
project_df = paren_proj.merge(child_proj, 'inner', 'parentId')
project_df.shape

(31, 4)

### Create a date column.

In [None]:
project_df['date'] = project_df['childTitle'].str.extract(r"(\d{1,2} \w{3,4}'23)")

In [None]:
# Fill the null dates values
null_date = project_df[project_df['date'].isnull() == 1]

project_df.loc[null_date.index, 'date'] = null_date['parentTitle'].str.extract(r"(\d{1,2} \w{3,4}'23)")[0]

In [None]:
# Convert date column data type
project_df['date'] = project_df['date'].astype('datetime64')

### Filter titles in the dataset.

In [None]:
# Filter parenTitle
project_df['parentTitle'] = (project_df['parentTitle']
 .str.replace(r"(\d{1,2} \w{3,4}'23)", '', regex=True)
 .str.replace('6 - ', '', regex=False)
 .str.replace(r'Python Project :\s?-  ', '', regex=True)
 .str.strip())

In [None]:
# Filter childTitle
project_df['childTitle'] = (project_df['childTitle'].str.replace(r"(\d{1,2} \w{3,4}'23)", '', regex=True)
 .str.strip())

### Project details.

In [None]:
print(f"This course has {project_df['parentId'].nunique()} different types of (parent) topics for project which are:")

for i in project_df['parentTitle'].unique():
    print(f'  - {i}')

This course has 5 different types of (parent) topics for project which are:
  - Web Scrapping
  - Image Scrapper
  - ML Projects
  - Computer Vision Projects
  - NLP Projects


In [None]:
print(f"And, there are {project_df['childId'].nunique()} different (child) topics for project which are:")

for i in project_df['parentTitle'].unique():
    print(f'  + {i}')
    for _, ii, j in project_df[['parentTitle', 'childTitle']].itertuples():
        if ii == i:
            print(f"    - {j}")
    print()

And, there are 31 different (child) topics for project which are:
  + Web Scrapping
    - Web Scrapping introduction
    - Integration With Web Portal.
    - Integration With Rest Api, Web Portal And Mongo Db
    - Deployment On Web Portal On AWS Cloud

  + Image Scrapper
    - Image Scrapping Introduction
    - Image Scrapping Deployment
    - Integration With Rest Api, Web Portal And Mongo Db
    - Deployment On Web Portal On Azure Cloud

  + ML Projects
    - Fault detection in wafers based on sensor data.
    - Cement strength reg.
    - Credit card fraud.
    - Fraud detection
    - Income prediction
    - Phishing classifier
    - Visibility climate

  + Computer Vision Projects
    - Object Tracking Project
    - Image Classification with SOTA CNNs
    - Image to Text using OCRs
    - Vision based Attendance System
    - Sign Language Detection
    - Shredder Systems

  + NLP Projects
    - Movie Review using BERT
    - NER using BERT
    - POS Tagging with BERT
    - Text Gener

In [None]:
def get_curr_df(data) -> pd.DataFrame:
    curr_dict: list[dict] = data['courseMetas'][0]['curriculum']

    df = pd.DataFrame(curr_dict)

    df = df.merge(df[['parent', 'title']],
                  how='inner',
                  left_on='_id',
                  right_on='parent',
                  suffixes=('_parent', '_child'))

    # Drop columns
    df.drop(columns=['_id', 'preview', 'parent_parent', 'parent_child'],
            inplace=True)

    # Rename columns
    df.rename(columns={
        'title_parent': 'parentTitle',
        'title_child': 'childTitle'
    }, inplace=True)

    # Create date column
    df['date'] = (df['parentTitle']
                  .str.extract(r"(\d{1,2} \w{3,5}'23)")
                  .astype('datetime64'))

    # Remove date sub-string from parentTitle
    df['parentTitle'] = (df['parentTitle'].str.split(n=2)
                         .str.get(-1)
                         .str.strip())

    return df


df = get_curr_df(data)
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 576 entries, 0 to 575
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   parentTitle  576 non-null    object        
 1   childTitle   576 non-null    object        
 2   date         576 non-null    datetime64[ns]
dtypes: datetime64[ns](1), object(2)
memory usage: 18.0+ KB


In [None]:
def print_curr_details(curr_df: pd.DataFrame, pat: str):
    for i in curr_df['parentTitle'].unique():
        if pat.lower() in i.lower():
            # Get date
            date = df.query('parentTitle==@i')['date'].mean()

            print(f'+ {i} - {date:%d %h, %Y}')

            for _, p, c in curr_df[['parentTitle', 'childTitle']].itertuples():
                if i == p:
                    print(f'  - {c}')

## Find/Get curriculum details by topics.

In [None]:
# Print some title for help
print(df['parentTitle'].sample(10).unique())


curr_inp = input('\nEnter the (parent) topic to see (child) topics: ')

print(f'\n>>> You entered: {curr_inp}\n')
print_curr_details(df, curr_inp)

['PowerBI PART-2' 'PowerBI' 'Clustering' 'Feature Engineering'
 'Neural Network A Simple Perception' 'Dimensionality Reduction'
 'String Objects' 'Image Classification Architectures']

>>> You entered: Engineering

+ Feature Engineering - 19 Mar, 2023
  - Handling Missing Data
  - Handling Imbalanced Data
  - Up-Sampling
  - Down-Sampling
  - Smote
  - Data Interpolation
  - Handling Outliers
  - Filter Method
  - Wrapper Method
  - Embedded Methods
  - Min-Max Scaling
  - Unit Vector
  - Feature Extraction
  - Pca (Principle Component Analysis)
  - Data Encoding
  - Nominal Encoding
  - One Hot Encoding
  - Ordinal Encoding
  - Label Encoding
  - Target Guided Ordinal Encoding
  - Covariance
  - Correlation Check
  - Pearson Correlation Coefficient
  - Spearman’S Rank Correlation
  -  Vif
