# WEEK 10 CHALLENGE

## Project
Moodle Database: Educational Data Log Analysis

## Content
* Libraries
* Data
    * Connection to sql database
* Task 1: Moodle database schema understanding

## Libraries

In [1]:
# Importing the necessary libraries
import pandas as pd
import psycopg2
import numpy as np
from sqlalchemy import create_engine
import sys

## Data

### Most Important Tables (MIT)
Moodle database is complex - with more than 400 connected tables! In this project we are interested only in the subset of the tables. The most important tables we will consider in this challenge are (tables in bold are VIP)  

* **mdl_logstore_standard_log**
* **mdl_context**
* **mdl_user**
* **mdl_course**
* **mdl_modules**
* **mdl_course_modules**
* **mdl_course_modules_completion** 
* mdl_grade_items
* **mdl_grade_grades**
* mdl_grade_categories
* mdl_grade_items_history
* mdl_grade_grades_history
* mdl_grade_categories_history
* mdl_forum
* mdl_forum_discussions
* mdl_forum_posts


In [2]:
# store the important tables in a list
moodle_tables = ['mdl_logstore_standard_log', 'mdl_context',
                 'mdl_user', 'mdl_course ', 'mdl_modules',
                 'mdl_course_modules ', 'mdl_course_modules_completion',
                 'mdl_grade_items', 'mdl_grade_grades',
                 'mdl_grade_categories', 'mdl_grade_items_history',
                 'mdl_grade_grades_history', 'mdl_grade_categories_history',
                 'mdl_forum', 'mdl_forum_discussions', 'mdl_forum_posts'
                ]

### Connection to sql database

In [3]:
# to load ipython-sql
%load_ext sql

In [4]:
# To connect ipython-sql to the moodle database
%sql postgresql://postgres:password@localhost/moodle

In [5]:
# To connect sqlalchemy to the moodle database
engine = create_engine('postgresql://postgres:password@localhost/moodle')

In [6]:
class Connection():
    """
    This class is used to:
    1. get a connection with the moodle postgres database
    2. get the cursor
    3. close the connection
    """
    # import the postgresql library
    import psycopg2
    
    def __init__(self, user:str, database:str, host:str, port:str=None, password:str=None):
        self.user = user
        self.password = password
        self.host = host
        self.port = port
        self.database = database
        
        self.connect()
        
    def __repr__(self):
        return
    
    # function to get the connection
    def connect(self):
        try: 
            self.connection = psycopg2.connect(user = self.user,
                                          password = self.password,
                                          host = self.host,
                                          port = self.port,
                                          database = self.database)
            
            print("Connection to sql server is successful.")
            
        except (Exception, psycopg2.Error) as error :
            print ("Error while connecting to PostgreSQL", error)
            
        return self
    
    # function to create cursor
    def create_cursor(self):
        try:
            cursor = self.connection.cursor()
            print("Cursor is successfully created.")
        except:
            print("Error creating cursor.")
            
        return cursor

    # function to close the connection
    def close_connection(self):
        print("The connection is now closed.")
        return self.connection.close()


In [7]:
# getting a connection to the postgresql database
con = Connection(user='postgres', password = password, 
                 host='127.0.0.1', port = 5432, database='moodle')

# getting cursor
cursor = con.create_cursor()

Connection to sql server is successful.
Cursor is successfully created.


## Task One
* Download the database and write an SQL script to count 
    * the number of tables 
    * the number of records in each of the tables given in the MIT section
    * Number of quiz submissions by hour of day
    * Monthly usage time of learners who have confirmed and are not deleted
    * Count of log events per user for the following verbs: ['loggedin', 'viewed', 'started', ,'submitted', 'uploaded', 'updated', 'searched', 'resumed', 'answered', 'attempted', 'abandoned']
* Write a python class to pull 
    * Overall grade of learners for Number of forum posts


#### Number of Tables

In [8]:
%%sql 
SELECT COUNT(*) as tables FROM information_schema.tables 
WHERE table_schema = 'public';

 * postgresql://postgres:***@localhost/moodle
1 rows affected.


tables
448


#### Number of records in each of the tables given in the MIT section

In [9]:
for table in moodle_tables:
    print(f'\nTable {table}\n----------------------------')
    result = pd.read_sql_query(sql=f"SELECT COUNT(*) FROM {table}", 
                              con=engine)
    print(result)


Table mdl_logstore_standard_log
----------------------------
    count
0  417554

Table mdl_context
----------------------------
   count
0   4359

Table mdl_user
----------------------------
   count
0   1052

Table mdl_course 
----------------------------
   count
0     15

Table mdl_modules
----------------------------
   count
0     26

Table mdl_course_modules 
----------------------------
   count
0    290

Table mdl_course_modules_completion
----------------------------
   count
0   4483

Table mdl_grade_items
----------------------------
   count
0    113

Table mdl_grade_grades
----------------------------
   count
0   3643

Table mdl_grade_categories
----------------------------
   count
0     16

Table mdl_grade_items_history
----------------------------
   count
0    486

Table mdl_grade_grades_history
----------------------------
   count
0   7108

Table mdl_grade_categories_history
----------------------------
   count
0     46

Table mdl_forum
--------------------------

#### Number of quiz submissions by hour of day

In [10]:
%%sql
SELECT COUNT(id) AS submissions, EXTRACT(hour FROM to_timestamp(timecreated)) AS hour_of_day
FROM mdl_logstore_standard_log
WHERE action = 'submitted' AND component = 'mod_quiz'
GROUP BY hour_of_day;

 * postgresql://postgres:***@localhost/moodle
24 rows affected.


submissions,hour_of_day
29,0.0
7,1.0
14,2.0
10,3.0
18,4.0
17,5.0
34,6.0
46,7.0
69,8.0
95,9.0


#### Monthly usage time of learners who have confirmed and are not deleted

In [11]:
%%sql
SELECT COUNT(lastaccess - firstaccess) AS time_usage,
EXTRACT(month FROM to_timestamp(firstaccess)) AS months
FROM mdl_user 
WHERE confirmed = 1 AND deleted = 0
GROUP BY months 
ORDER  BY  time_usage;

 * postgresql://postgres:***@localhost/moodle
7 rows affected.


time_usage,months
27,6.0
31,7.0
58,3.0
111,5.0
140,4.0
227,2.0
450,1.0


#### Count of log events per user for the following verbs: ['loggedin', 'viewed', 'started', ,'submitted', 'uploaded', 'updated', 'searched', 'resumed', 'answered', 'attempted', 'abandoned']

In [12]:
# store the verbs in a list
actions = ['loggedin', 'viewed', 'started','submitted',
          'uploaded', 'updated', 'searched', 'resumed',
          'answered', 'attempted', 'abandoned']

In [13]:
# iterate through the verb list to get event count for each verb
for action in actions:
    print(f"Log Event: {action}\n----------------------")
    result = pd.read_sql_query(sql=f"SELECT userid, COUNT(action) AS event_count FROM mdl_logstore_standard_log \
                                     WHERE action = '{action}' \
                                     GROUP BY userid \
                                     ORDER BY event_count DESC \
                                     LIMIT 5", 
                               con=engine)
    print(f"{result}\n----------------------")

Log Event: loggedin
----------------------
   userid  event_count
0       2          169
1     246          113
2       3          107
3     369          100
4     165           91
----------------------
Log Event: viewed
----------------------
   userid  event_count
0       0         5305
1       3         4041
2       2         2492
3     246         2416
4     917         2243
----------------------
Log Event: started
----------------------
   userid  event_count
0     930           65
1     581           60
2     185           52
3     165           45
4     344           44
----------------------
Log Event: submitted
----------------------
   userid  event_count
0     930           63
1     581           59
2     369           42
3     344           39
4     165           37
----------------------
Log Event: uploaded
----------------------
   userid  event_count
0       2           21
1     246           14
2     347            8
3       3            6
4     383            5
-----

#### Write a python class to pull 
* Overall grade of learners for Number of forum posts

#### Python Class

In [14]:
class Forum(Connection):
    
    """
    This class is makes use of the Connection Constructor and 
    it is used to:
    1. get a connection with the moodle postgres database
    2. pull the overall grade of learners
    3. pull the number of forum posts
    """
    
    # import the pandas library
    import pandas as pd
    
    def __init__(self, user:str, database:str, host:str, port:str=None, password:str=None):
        
        super().__init__(user, database, host, port, password)
        
        self.user = user
        self.password = password
        self.host = host
        self.port = port
        self.database = database
        
    def __repr__(self):
        return "Overall Grade of Learners and Number of Forum Posts"
 
    def overall_grade(self):
        grades = pd.read_sql_query(sql="SELECT AVG(finalgrade) AS grade_avg, \
                                        COUNT(finalgrade) AS grade_counts, \
                                        SUM(finalgrade) AS grades_sum \
                                        FROM mdl_grade_grades",
                                   con=self.connection)
        return grades
    
    def forum_count(self):
        f_count = pd.read_sql_query(sql="SELECT COUNT(id) AS forum_counts \
                                         FROM mdl_forum_posts;", 
                                    con=self.connection)
        return f_count

In [15]:
# getting a connection to the postgresql database
grades_forum = Forum(user='postgres', password = password, 
                     host='127.0.0.1', port = 5432, database='moodle')

Connection to sql server is successful.


In [16]:
grades_forum

Overall Grade of Learners and Number of Forum Posts

In [17]:
# getting the overall grade
grades_forum.overall_grade()

Unnamed: 0,grade_avg,grade_counts,grades_sum
0,107.371718,2160,231922.91029


In [18]:
# getting the number of forum post
grades_forum.forum_count()

Unnamed: 0,forum_counts
0,131


In [19]:
# closing the connection to database
con.close_connection()

The connection is now closed.
