In [173]:
import pandas as pd
import numpy as np
import json

In [174]:
with open('./raw/freetimelearning.jsonl', 'r', encoding='utf-8') as file:
    freetimelearining_data = (json.loads(line[:-1])
                              for line in file.readlines())

In [175]:
free_df = pd.DataFrame(freetimelearining_data)

In [176]:
free_df['category'] = free_df['category'].str.replace(' - Quiz(MCQ)', '')
free_df['category'] = free_df['category'].str.strip()
free_df['answer'] = free_df['right_answer'].str.extract(r'((?<=\()[A-Z])')

filt1 = free_df['1'].isna() & (free_df['answer'] == 'A')
filt2 = free_df['2'].isna() & (free_df['answer'] == 'B')
filt3 = free_df['3'].isna() & (free_df['answer'] == 'C')
filt4 = free_df['4'].isna() & (free_df['answer'] == 'D')
filt5 = free_df['question'].isna()

free_df = free_df.loc[~(filt1 | filt2 | filt3 | filt4 | filt5)]

free_df = free_df.loc[free_df['section'] == 'Computer Science']

free_df = free_df.iloc[:, 1:]

In [177]:
free_df.columns = [
    '1 - category',
    '2 - question',
    '3 - right_answer',
    '4 - link',
    '6 - A',
    '7 - B',
    '8 - C',
    '9 - D',
    '5 - answer'
]
free_df.sort_index(axis='columns', inplace=True)
free_df.columns = [col[4:] for col in free_df.columns]
free_df['E'] = free_df['D'].apply(lambda x: None)

In [178]:
with open('raw/examveda.jsonl', 'r', encoding='utf-8') as file:
    examveda_data = [json.loads(line[:-1]) for line in file.readlines()]

In [179]:
examveda_df = pd.DataFrame(examveda_data)

In [180]:
examveda_df.drop_duplicates(inplace=True)
examveda_df.replace(np.nan, '', inplace=True)
examveda_df.dropna(axis=1, how='all', inplace=True)

filt1 = examveda_df['6'] != ""
filt2 = examveda_df['right_answer'] == '0'
filt3 = examveda_df['right_answer'] == ''

index = examveda_df.loc[filt1 | filt2 | filt3].index
examveda_df.drop(index=index, inplace=True)
examveda_df = examveda_df.iloc[:, 0:10]

examveda_df = examveda_df.iloc[:, 1:]

In [181]:
expanded = examveda_df['category'].str.split('-', expand=True)
examveda_df['cat'] = expanded[0]
examveda_df['sec'] = expanded[1]

In [182]:
examveda_df['cat'].replace(['Computer Fundamentals', 'Computer Science'],
                           ['', ''],
                           inplace=True)

In [183]:
examveda_df['category'] = examveda_df.apply(
    lambda x: x['cat'] if x['cat'] else x['sec'],
    axis='columns'
)
examveda_df.drop(columns=['cat', 'sec'], axis='columns', inplace=True)
examveda_df['answer'] = examveda_df['right_answer'].replace(
    ['1', '2', '3', '4', '5'],
    ['A', 'B', 'C', 'D', 'E']
)

In [184]:
examveda_df.columns = [
    '01 - category',
    '02 - question',
    '03 - right_answer',
    '04 - link',
    '06 - A',
    '07 - B',
    '08 - C',
    '09 - D',
    '10 - E',
    '05 - answer'
]
examveda_df.sort_index(axis='columns', inplace=True)
examveda_df.columns = [col[5:] for col in examveda_df.columns]

In [185]:
df = pd.concat([
    free_df.reset_index(),
    examveda_df.reset_index(),
])

In [186]:
df.drop(columns=['right_answer', 'index'], axis='columns', inplace=True)

In [187]:
df['category'].replace(
  ['Javascript', 'MS Excel', 'MS Word', 'Power Point', 'Java Program'],
  ['JavaScript', 'Microsoft Excel', 'Microsoft Word', 'Microsoft PowerPoint', 'Java'],
    inplace=True)

In [188]:
df.replace(np.nan, '', inplace=True)

In [189]:
filt1 = (df['A'] == '') & (df['answer'] == 'A')
filt2 = (df['B'] == '') & (df['answer'] == 'B')
filt3 = (df['C'] == '') & (df['answer'] == 'C')
filt4 = (df['D'] == '') & (df['answer'] == 'D')
filt5 = (df['E'] == '') & (df['answer'] == 'E')

idx = df.loc[filt1 | filt2 | filt3 | filt4 | filt5].index
df.drop(index=idx, axis='index', inplace=True)

In [190]:
df.sort_values('category', axis='index', inplace=True)

In [191]:
# df['category'].value_counts().to_excel('categories.xlsx')
# df.to_excel('data.xlsx', index=False)

In [193]:
df['category'].value_counts()

category
Java                                  2951
C Program                             2395
Data Science                          1795
Computer Fundamental Miscellaneous    1417
Database                              1332
                                      ... 
Pytorch                                 15
Augmented Reality                       14
Materialize CSS                         10
jQuery                                  10
Ajax                                     7
Name: count, Length: 148, dtype: int64

In [192]:
df

Unnamed: 0,category,question,link,answer,A,B,C,D,E
5689,.Net,garbage collector is a?,https://www.freetimelearning.com/online-quiz/d...,D,The Boolean data type,is unsigne has two states,is displayed by the program as yes or no.,Both (A) and (B),
5699,.Net,The____namespace is the root of all namespaces...,https://www.freetimelearning.com/online-quiz/d...,A,System,System.Root,System.Collections,System.Globalization,
5698,.Net,State whether the following statements about ....,https://www.freetimelearning.com/online-quiz/d...,B,"i-True, ii-True, iii-True","i-True, ii- True, iii-False","i-False, ii-True, iii-True","i-True, ii-False, iii-True",
5697,.Net,State which of the following statements about ...,https://www.freetimelearning.com/online-quiz/d...,A,i and iii only,i and ii only,ii and iii only,None of the Above,
5696,.Net,The______namespace includes classes and interf...,https://www.freetimelearning.com/online-quiz/d...,D,system,system.data,system.drawing,system.collection,
...,...,...,...,...,...,...,...,...,...
8863,jQuery,jQuery code to set the background color of al...,https://www.freetimelearning.com/online-quiz/j...,B,"$(“span”).style(“background-color”,”blue”);","$(“span”).css(“background-color”,”blue”);","$(“span”).manipulate(“background-color”,”blue”);","$(“span”).layout(“background-color”,”blue”);",
8865,jQuery,Select all elements that contain the specified...,https://www.freetimelearning.com/online-quiz/j...,A,:selects(),:contains(),Both,None of the above,
8866,jQuery,jQuery method is used to perform an asynchrono...,https://www.freetimelearning.com/online-quiz/j...,C,jQuery.ajaxAsync(),jQuery.ajaxSetup(),jQuery.ajax(),None of the above,
8857,jQuery,Which of the following Jquery library extension?,https://www.freetimelearning.com/online-quiz/j...,C,,,.js,,
