# Import libraries

In [81]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import isnan, count, when, col, desc, udf, col, sort_array, asc, avg
from pyspark.sql.functions import sum as Fsum
from pyspark.sql.window import Window
from pyspark.sql.types import IntegerType

import pyspark.sql.functions as f

## Instantiate a Spark Session

In [82]:
spark = SparkSession \
    .builder \
    .appName("Processing incident event log data") \
    .getOrCreate()

## Read in the data set

In [83]:
df = spark.read \
    .format("csv") \
    .option("header", "true") \
    .option("mode", "DROPMALFORMED") \
    .load("incident_event_log.csv")

In [84]:
df.printSchema()

root
 |-- number: string (nullable = true)
 |-- incident_state: string (nullable = true)
 |-- active: string (nullable = true)
 |-- reassignment_count: string (nullable = true)
 |-- reopen_count: string (nullable = true)
 |-- sys_mod_count: string (nullable = true)
 |-- made_sla: string (nullable = true)
 |-- caller_id: string (nullable = true)
 |-- opened_by: string (nullable = true)
 |-- opened_at: string (nullable = true)
 |-- sys_created_by: string (nullable = true)
 |-- sys_created_at: string (nullable = true)
 |-- sys_updated_by: string (nullable = true)
 |-- sys_updated_at: string (nullable = true)
 |-- contact_type: string (nullable = true)
 |-- location: string (nullable = true)
 |-- category: string (nullable = true)
 |-- subcategory: string (nullable = true)
 |-- u_symptom: string (nullable = true)
 |-- cmdb_ci: string (nullable = true)
 |-- impact: string (nullable = true)
 |-- urgency: string (nullable = true)
 |-- priority: string (nullable = true)
 |-- assignment_group: 

## Filter new incidents that exceeded the target SLA 

In [93]:
exceeded_sla = df.filter((f.col('incident_state') == 'New') & (f.col('made_sla') == True)).show(vertical=True)
exceeded_sla

-RECORD 0----------------------------------
 number                  | INC0000045      
 incident_state          | New             
 active                  | true            
 reassignment_count      | 0               
 reopen_count            | 0               
 sys_mod_count           | 0               
 made_sla                | true            
 caller_id               | Caller 2403     
 opened_by               | Opened by  8    
 opened_at               | 29/2/2016 01:16 
 sys_created_by          | Created by 6    
 sys_created_at          | 29/2/2016 01:23 
 sys_updated_by          | Updated by 21   
 sys_updated_at          | 29/2/2016 01:23 
 contact_type            | Phone           
 location                | Location 143    
 category                | Category 55     
 subcategory             | Subcategory 170 
 u_symptom               | Symptom 72      
 cmdb_ci                 | ?               
 impact                  | 2 - Medium      
 urgency                 | 2 - M

In [95]:
## Number of records of which incidents exceeded SLA and had an incident state value of New

exceeded_sla = df.filter((f.col('incident_state') == 'New') & (f.col('made_sla') == True)).count()
exceeded_sla

36407

In [97]:
data = pd.read_csv('incident_event_log.csv')
data

Unnamed: 0,number,incident_state,active,reassignment_count,reopen_count,sys_mod_count,made_sla,caller_id,opened_by,opened_at,...,u_priority_confirmation,notify,problem_id,rfc,vendor,caused_by,closed_code,resolved_by,resolved_at,closed_at
0,INC0000045,New,True,0,0,0,True,Caller 2403,Opened by 8,29/2/2016 01:16,...,False,Do Not Notify,?,?,?,?,code 5,Resolved by 149,29/2/2016 11:29,5/3/2016 12:00
1,INC0000045,Resolved,True,0,0,2,True,Caller 2403,Opened by 8,29/2/2016 01:16,...,False,Do Not Notify,?,?,?,?,code 5,Resolved by 149,29/2/2016 11:29,5/3/2016 12:00
2,INC0000045,Resolved,True,0,0,3,True,Caller 2403,Opened by 8,29/2/2016 01:16,...,False,Do Not Notify,?,?,?,?,code 5,Resolved by 149,29/2/2016 11:29,5/3/2016 12:00
3,INC0000045,Closed,False,0,0,4,True,Caller 2403,Opened by 8,29/2/2016 01:16,...,False,Do Not Notify,?,?,?,?,code 5,Resolved by 149,29/2/2016 11:29,5/3/2016 12:00
4,INC0000047,New,True,0,0,0,True,Caller 2403,Opened by 397,29/2/2016 04:40,...,False,Do Not Notify,?,?,?,?,code 5,Resolved by 81,1/3/2016 09:52,6/3/2016 10:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141707,INC0120835,Closed,False,1,0,4,True,Caller 116,Opened by 12,16/2/2017 09:09,...,True,Do Not Notify,?,?,?,?,code 9,Resolved by 9,16/2/2017 09:53,16/2/2017 09:53
141708,INC0121064,Active,True,0,0,0,True,Caller 116,Opened by 12,16/2/2017 14:17,...,False,Do Not Notify,?,?,?,?,code 6,Resolved by 9,16/2/2017 16:38,16/2/2017 16:38
141709,INC0121064,Active,True,1,0,1,True,Caller 116,Opened by 12,16/2/2017 14:17,...,False,Do Not Notify,?,?,?,?,code 6,Resolved by 9,16/2/2017 16:38,16/2/2017 16:38
141710,INC0121064,Resolved,True,1,0,2,True,Caller 116,Opened by 12,16/2/2017 14:17,...,True,Do Not Notify,?,?,?,?,code 6,Resolved by 9,16/2/2017 16:38,16/2/2017 16:38
