In [1]:
# convert processed.json to dataframe
import json
import pandas as pd

with open("./data/processed.json") as data_file:
    json_data = json.load(data_file)

    affiliates = [d.get("affiliates") for d in json_data]
    key = [d.get("key") for d in json_data]
    title = [d.get("title") for d in json_data]
    level = [d.get("level") for d in json_data]
    num_of_projects = [d.get("num_of_projects") for d in json_data]
    tags = [d.get("tags") for d in json_data]
    average_rating = [d.get("average_rating") for d in json_data]
    count = [d.get("count") for d in json_data]
    stats = [d.get("stats") for d in json_data]


    data = {
        "title": title,
        "affiliates": affiliates,
        "key": key,
        "level": level,
        "num_of_projects": num_of_projects,
        "tags": tags,
        "average_rating": average_rating,
        "count": count,
        "stats": stats,
    }

    df = pd.DataFrame(data, columns = ["title", "affiliates", "key", "level", "num_of_projects", "tags", "average_rating", "count", "stats"])

    print(df)


# df = pd.read_json("./data/processed.json")

title  \
0                               Product Manager   
1                       AI for Business Leaders   
2     Intro to Machine Learning with TensorFlow   
3                                   UX Designer   
4                                Data Streaming   
..                                          ...   
58                      Artificial Intelligence   
59                      Front End Web Developer   
60          Full Stack Web Developer Nanodegree   
61         Machine Learning Engineer Nanodegree   
62  Secure and Private AI Scholarship Challenge   

                                           affiliates    key         level  \
0                                                  []  nd036      beginner   
1                                                  []  nd054  intermediate   
2   [{'image': '/assets/iridium/images/shared/part...  nd230      beginner   
3                                                  []  nd578      beginner   
4                                      

In [10]:
df1 = df["average_ratings"]

df1.describe()

count    63.000000
mean      4.416854
std       0.877063
min       0.000000
25%       4.500000
50%       4.588435
75%       4.711656
max       5.000000
Name: average_rating, dtype: float64

In [2]:

sample_data = {
    "title": data.get("title"),
    "average_rating": data.get("average_rating"),
}

df_ratings = pd.DataFrame(sample_data)

df_ratings.nsmallest(20, "average_rating").plot(figsize=(10,5), kind="bar", legend=True, x="title", y="average_rating")
df_ratings.nlargest(20, "average_rating").plot(figsize=(10,5), kind="bar", legend=True, x="title", y="average_rating")


<matplotlib.axes._subplots.AxesSubplot at 0x11e35c990>

In [4]:

# slice data to work with dataframe better
# and drop none values

new_df = df[
    ["title", "average_rating", "key", "count", "level", "stats", "num_of_projects", "tags"]
].dropna()

print(new_df)

title  average_rating        key  \
0                             Product Manager        4.568627      nd036   
1                     AI for Business Leaders        5.000000      nd054   
2   Intro to Machine Learning with TensorFlow        4.847826      nd230   
3                                 UX Designer        4.724324      nd578   
4                              Data Streaming        3.884058      nd029   
5                     Front End Web Developer        4.556561     nd0011   
6                    Full Stack Web Developer        4.536585     nd0044   
7                              Java Developer        2.906250      nd035   
8                          AI Product Manager        4.252252      nd088   
9                      Sensor Fusion Engineer        4.720930      nd313   
10                         Data Visualization        4.688889      nd197   
11                            Cloud Developer        4.376404     nd9990   
12                      Cloud DevOps Engineer       

In [5]:

# lets see if we can group the degrees by levels
df_levels = new_df.groupby(["level"]).mean()
df_levels.head()


Unnamed: 0_level_0,average_rating,count,num_of_projects
level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Advanced,4.72093,82.0,4.0
Beginner,4.563096,129.666667,3.666667
Intermediate,3.973497,144.0,4.333333
Intermediate,4.425632,178.0,5.0
advanced,4.515061,179.272727,1.909091


In [14]:

# looks like advanced / beginner and intermediate have duplicate entries in levels 
# should fix that but can't be bothered right now

# Todo 
# lets filter the df by removing num_of_projects
filtered_df = new_df.loc[new_df["num_of_projects"] > 0]


# filtered_df
# lets sort the projects by rating
# (using pandas 1.0.0 - we need to use .sort_values instead of .sort)
sorted_df = filtered_df.sort_values(by =["average_rating"], ascending=False)


# lets now sort the sorted_df by average_rating, count, level and num_of_projects
sorted_df = sorted_df.sort_values(by = ["level", "average_rating", "count", "num_of_projects"], ascending = False)

sorted_df.head()

Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
50,Android Developer,4.734921,nd801,901,intermediate,"[{'rating': 5, 'count': 1616, 'percentage': 81...",14,"[android, App Development, Android App Develop..."
21,Become a Data Analyst,4.722488,nd002-ent,161,intermediate,"[{'rating': 5, 'count': 170, 'percentage': 81....",22,"[r, hadoop, python, d3]"
42,Intro to Self-Driving Cars,4.680217,nd113,224,intermediate,"[{'rating': 5, 'count': 288, 'percentage': 78....",8,"[Self Driving Car, Automation, Bayesian Thinki..."
24,Predictive Analytics for Business,4.641667,nd008t,106,intermediate,"[{'rating': 5, 'count': 91, 'percentage': 75.8...",8,"[Data Analytics, No Coding, Alyterx, Tableau, ..."
37,Deep Learning,4.639004,nd101,1043,intermediate,"[{'rating': 5, 'count': 1661, 'percentage': 76...",5,"[Neural Network, AI, Pytorch, Numpy, CNN, Mach..."


In [19]:
# lets now handle the level where it is capitalized

sorted_df.loc[sorted_df["level"].str.contains("Advanced"), "level"] = "advanced"
sorted_df.loc[sorted_df["level"].str.contains("Intermediate"), "level"] = "intermediate"
sorted_df.loc[sorted_df["level"].str.contains("Beginner"), "level"] = "beginner"

sorted_df


Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
50,Android Developer,4.734921,nd801,901,intermediate,"[{'rating': 5, 'count': 1616, 'percentage': 81...",14,"[android, App Development, Android App Develop..."
21,Become a Data Analyst,4.722488,nd002-ent,161,intermediate,"[{'rating': 5, 'count': 170, 'percentage': 81....",22,"[r, hadoop, python, d3]"
42,Intro to Self-Driving Cars,4.680217,nd113,224,intermediate,"[{'rating': 5, 'count': 288, 'percentage': 78....",8,"[Self Driving Car, Automation, Bayesian Thinki..."
24,Predictive Analytics for Business,4.641667,nd008t,106,intermediate,"[{'rating': 5, 'count': 91, 'percentage': 75.8...",8,"[Data Analytics, No Coding, Alyterx, Tableau, ..."
37,Deep Learning,4.639004,nd101,1043,intermediate,"[{'rating': 5, 'count': 1661, 'percentage': 76...",5,"[Neural Network, AI, Pytorch, Numpy, CNN, Mach..."
22,iOS Developer,4.610306,nd003,301,intermediate,"[{'rating': 5, 'count': 456, 'percentage': 73....",14,"[swift, iOS, Swift, Make App, Make iOS App, Bu..."
31,React,4.588435,nd019,308,intermediate,"[{'rating': 5, 'count': 451, 'percentage': 76....",3,"[React, Front-end , Redux, React Native, React..."
5,Front End Web Developer,4.556561,nd0011,221,intermediate,"[{'rating': 5, 'count': 159, 'percentage': 71....",6,"[object oriented, object-oriented, UI, UX, Use..."
6,Full Stack Web Developer,4.536585,nd0044,164,intermediate,"[{'rating': 5, 'count': 109, 'percentage': 66....",5,"[computer science, front-end, frontend, javasc..."
17,Data Engineer,4.481229,nd027,283,intermediate,"[{'rating': 5, 'count': 197, 'percentage': 67....",6,"[data pipelines, data lakes, Data model, Apach..."


In [24]:
# let check sorted_df
sorted_df.groupby(["level"]).mean()

sorted_df = sorted_df.sort_values(by = ["level", "average_rating", "count", "num_of_projects"], ascending = False)

# we're done massging here
sorted_df

Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
50,Android Developer,4.734921,nd801,901,intermediate,"[{'rating': 5, 'count': 1616, 'percentage': 81...",14,"[android, App Development, Android App Develop..."
21,Become a Data Analyst,4.722488,nd002-ent,161,intermediate,"[{'rating': 5, 'count': 170, 'percentage': 81....",22,"[r, hadoop, python, d3]"
42,Intro to Self-Driving Cars,4.680217,nd113,224,intermediate,"[{'rating': 5, 'count': 288, 'percentage': 78....",8,"[Self Driving Car, Automation, Bayesian Thinki..."
24,Predictive Analytics for Business,4.641667,nd008t,106,intermediate,"[{'rating': 5, 'count': 91, 'percentage': 75.8...",8,"[Data Analytics, No Coding, Alyterx, Tableau, ..."
37,Deep Learning,4.639004,nd101,1043,intermediate,"[{'rating': 5, 'count': 1661, 'percentage': 76...",5,"[Neural Network, AI, Pytorch, Numpy, CNN, Mach..."
22,iOS Developer,4.610306,nd003,301,intermediate,"[{'rating': 5, 'count': 456, 'percentage': 73....",14,"[swift, iOS, Swift, Make App, Make iOS App, Bu..."
31,React,4.588435,nd019,308,intermediate,"[{'rating': 5, 'count': 451, 'percentage': 76....",3,"[React, Front-end , Redux, React Native, React..."
5,Front End Web Developer,4.556561,nd0011,221,intermediate,"[{'rating': 5, 'count': 159, 'percentage': 71....",6,"[object oriented, object-oriented, UI, UX, Use..."
15,Data Structures and Algorithms,4.547872,nd256,181,intermediate,"[{'rating': 5, 'count': 143, 'percentage': 76....",4,"[big O notiation, recursion, trees, maps, hash..."
6,Full Stack Web Developer,4.536585,nd0044,164,intermediate,"[{'rating': 5, 'count': 109, 'percentage': 66....",5,"[computer science, front-end, frontend, javasc..."


---

### beginner nanodegrees

In [31]:

beginner_df = sorted_df[sorted_df["level"] == "beginner"]
beginner_df.head()

Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
52,Android Basics,4.846098,nd803,1399,beginner,"[{'rating': 5, 'count': 2415, 'percentage': 88...",12,"[mobile, apps, development, android, App Devel..."
16,Programming for Data Science with R,4.833333,nd118,12,beginner,"[{'rating': 5, 'count': 10, 'percentage': 83.3...",3,"[R, nd118, SQL, Advance SQl, Basic Data Scienc..."
13,Intro to Machine Learning with PyTorch,4.748148,nd229,129,beginner,"[{'rating': 5, 'count': 105, 'percentage': 77....",4,"[pyhton, python, sql, predictive, supervised, ..."
38,Programming for Data Science with Python,4.702381,nd104,239,beginner,"[{'rating': 5, 'count': 195, 'percentage': 77....",3,"[Data Analytics, SQL, SQL join, Python, Numpy,..."
18,Marketing Analytics,4.696203,nd028,76,beginner,"[{'rating': 5, 'count': 63, 'percentage': 79.7...",8,"[digital marketing, google, Data Analytics, Da..."
10,Data Visualization,4.688889,nd197,45,beginner,"[{'rating': 5, 'count': 35, 'percentage': 77.7...",4,"[data visualization, Build Dashboard, Dashboar..."
30,Digital Marketing,4.681858,nd018,663,beginner,"[{'rating': 5, 'count': 895, 'percentage': 78....",8,"[No Coding, Social Media Marketing, Social Med..."
19,Introduction to Programming,4.62798,nd000,1215,beginner,"[{'rating': 5, 'count': 1660, 'percentage': 74...",5,"[python, css, html, javascript, HTML, CSS, Pyt..."
26,Business Analyst,4.570621,nd008,162,beginner,"[{'rating': 5, 'count': 259, 'percentage': 73....",9,[]
36,Data Foundations for Enterprise,4.568,nd100-ent,52,beginner,"[{'rating': 5, 'count': 102, 'percentage': 81....",4,[]


---

### intermediate nanodegrees

In [34]:
intermediate_df = sorted_df[sorted_df["level"] == "intermediate"]
intermediate_df

Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
50,Android Developer,4.734921,nd801,901,intermediate,"[{'rating': 5, 'count': 1616, 'percentage': 81...",14,"[android, App Development, Android App Develop..."
21,Become a Data Analyst,4.722488,nd002-ent,161,intermediate,"[{'rating': 5, 'count': 170, 'percentage': 81....",22,"[r, hadoop, python, d3]"
42,Intro to Self-Driving Cars,4.680217,nd113,224,intermediate,"[{'rating': 5, 'count': 288, 'percentage': 78....",8,"[Self Driving Car, Automation, Bayesian Thinki..."
24,Predictive Analytics for Business,4.641667,nd008t,106,intermediate,"[{'rating': 5, 'count': 91, 'percentage': 75.8...",8,"[Data Analytics, No Coding, Alyterx, Tableau, ..."
37,Deep Learning,4.639004,nd101,1043,intermediate,"[{'rating': 5, 'count': 1661, 'percentage': 76...",5,"[Neural Network, AI, Pytorch, Numpy, CNN, Mach..."
22,iOS Developer,4.610306,nd003,301,intermediate,"[{'rating': 5, 'count': 456, 'percentage': 73....",14,"[swift, iOS, Swift, Make App, Make iOS App, Bu..."
31,React,4.588435,nd019,308,intermediate,"[{'rating': 5, 'count': 451, 'percentage': 76....",3,"[React, Front-end , Redux, React Native, React..."
5,Front End Web Developer,4.556561,nd0011,221,intermediate,"[{'rating': 5, 'count': 159, 'percentage': 71....",6,"[object oriented, object-oriented, UI, UX, Use..."
15,Data Structures and Algorithms,4.547872,nd256,181,intermediate,"[{'rating': 5, 'count': 143, 'percentage': 76....",4,"[big O notiation, recursion, trees, maps, hash..."
6,Full Stack Web Developer,4.536585,nd0044,164,intermediate,"[{'rating': 5, 'count': 109, 'percentage': 66....",5,"[computer science, front-end, frontend, javasc..."


---

### advanced nanodegrees

In [35]:
advanced_df = sorted_df[sorted_df["level"] == "advanced"]

advanced_df

Unnamed: 0,title,average_rating,key,count,level,stats,num_of_projects,tags
32,Mobile Web Specialist,4.807229,nd024,163,advanced,"[{'rating': 5, 'count': 287, 'percentage': 86....",3,[]
28,Self Driving Car Engineer,4.746765,nd013,520,advanced,"[{'rating': 5, 'count': 897, 'percentage': 82....",14,"[Computer Vision, Sensor Fusion, Localization,..."
9,Sensor Fusion Engineer,4.72093,nd313,82,advanced,"[{'rating': 5, 'count': 67, 'percentage': 77.9...",4,"[Lidar, Radar, Kalman Filters, Radar Calibrati..."
27,Machine Learning Engineer,4.533333,nd009t,247,advanced,"[{'rating': 5, 'count': 249, 'percentage': 72....",4,"[python, Machine Learning, Deep learning, AI, ..."
