# Test Data Generation: Insights Tables Class

**Author**: *Jordyn Leonauskas (updated by Christian)*. **Affiliation**: *Kwantum Edu Analytics*. **Last Modified**: *2/6/2023*.

This OEA test data generation class notebook generates fictitous Insights roster tables, as seen in the Microsoft Education Insights module. This notebook is needed to successfully run the J_insights_test_data_gen_demo notebook.

This class notebook primarily leans on the use of the OEA_py class notebook, Faker python package, and already-generated base-truth tables to generate **21** Insights module roster tables:

 1. **Course**
 2. **CourseSubject**
 3. **CourseGradeLevel**
 4. **Enrollment**
 5. **Organization**
 6. **Person**
 7. **PersonDemographic**
 8. **PersonDemographicEthnicity**
 9. **PersonDemographicPersonFlag**
 10. **PersonDemographicRace**
 11. **PersonEmailAddress**
 12. **PersonIdentifier**
 13. **PersonOrganizationRole**
 14. **PersonPhoneNumber**
 15. **PersonRelationship**
 16. **RefDefinition** *(Note: This CSV is landed from GitHub as an ungenerated base-truth table.)*
 17. **Section**
 18. **SectionGradeLevel**
 19. **SectionSession**
 20. **SectionSubject**
 21. **Session**
 22. **SourceSystem**

In [1]:
import logging
import random
from tokenize import Ignore
from faker import Faker
import pandas as pd
import datetime as dt
import numpy as np

class InsightsDataGen():
    def __init__(self, startdate='2022-01-03T00:00:00', enddate='2022-06-03T00:00:00'):
        self.startdate = startdate
        self.enddate = enddate
        
        self.faker = Faker('en_US')

        # set current datetime for rundate folder for writing out files
        currentDate = dt.datetime.now()
        self.currentDateTime = currentDate.strftime("%Y-%m-%d %H-%M-%S")

        # initialize dfs for each Insights table to be generated
        course = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'Name':[],
            'OrganizationId':[],
            'IsActiveInSession':[],
            'Code':[],
            'AcademicYearSessionId':[]
        }
        self.M365_course = pd.DataFrame(course, dtype=object)
        coursesubject = {
            'Id':[],
            'CourseId':[],
            'RefAcademicSubjectId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_coursesubject = pd.DataFrame(coursesubject, dtype=object)
        coursegradelevel = {
            'Id':[],
            'CourseId':[],
            'RefGradeLevelId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_coursegradelevel = pd.DataFrame(coursegradelevel, dtype=object)
        enroll = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'PersonId':[],
            'SectionId':[],
            'RefSectionRoleId':[],
            'IsActiveInSession':[],
            'IsPrimaryStaffForSection':[],
            'EntryDate':[],
            'ExitDate':[]
        }
        self.M365_enrollment = pd.DataFrame(enroll, dtype=object)
        org = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'Name':[],
            'Identifier':[],
            'RefOrganizationTypeId':[],
            'ParentOrganizationId':[]
        }
        self.M365_organization = pd.DataFrame(org, dtype=object)
        person = {
            'Id':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'Surname':[],
            'GivenName':[],
            'MiddleName':[],
            'PreferredSurname':[],
            'PreferredGivenName':[],
            'PreferredMiddleName':[]
        }
        self.M365_person = pd.DataFrame(person, dtype=object)
        persondemo = {
            'Id':[],
            'PersonId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'RefSexId':[],
            'BirthDate':[],
            'BirthCity':[],
            'BirthState':[],
            'BirthCountryCode':[]
        }
        self.M365_persondemographic = pd.DataFrame(persondemo, dtype=object)
        persondemo_eth = {
            'Id':[],
            'PersonId':[],
            'RefEthnicityId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[]
        }
        self.M365_persondemographicethnicity = pd.DataFrame(persondemo_eth, dtype=object)
        persondemo_pflag = {
            'Id':[],
            'PersonId':[],
            'RefPersonFlag':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[]
        }
        self.M365_persondemographicpersonflag = pd.DataFrame(persondemo_pflag, dtype=object)
        persondemo_race = {
            'Id':[],
            'PersonId':[],
            'RefRaceId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[]
        }
        self.M365_persondemographicrace = pd.DataFrame(persondemo_race, dtype=object)
        personemailadd = {
            'Id':[],
            'PersonId':[],
            'EmailAddress':[],
            'PriorityOrder':[],
            'RefEmailAddressTypeId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_personemailaddress = pd.DataFrame(personemailadd, dtype=object)
        personidentifier = {
            'Id':[],
            'PersonId':[],
            'SourceSystemId':[],
            'RefIdentifierTypeId':[],
            'Identifier':[],
            'FirstSeenDateTime':[],
            'IsPresentInSource':[]
        }
        self.M365_personidentifier = pd.DataFrame(personidentifier, dtype=object)
        personorgrole = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'OrganizationId':[],
            'PersonId':[],
            'RefRoleId':[],
            'SessionId':[],
            'IsActiveInSession':[],
            'RoleStartDate':[],
            'RoleEndDate':[],
            'IsPrimary':[],
            'RefGradeLevelId':[]
        }
        self.M365_personorganizationrole = pd.DataFrame(personorgrole, dtype=object)
        personphone = {
            'Id':[],
            'PersonId':[],
            'PhoneNumber':[],
            'PriorityOrder':[],
            'RefPhoneNumberTypeId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_personphonenumber = pd.DataFrame(personphone, dtype=object)
        personrelationship = {
            'Id':[],
            'PersonId':[],
            'RelatedPerssonId':[],
            'RefPersonRelationshipId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_personrelationship = pd.DataFrame(personrelationship, dtype=object)
        section = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'Name':[],
            'OrganizationId':[],
            'CourseId':[],
            'Code':[],
            'Location':[]
        }
        self.M365_section = pd.DataFrame(section, dtype=object)
        sectiongradelevel = {
            'Id':[],
            'SectionId':[],
            'RefGradeLevelId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_sectiongradelevel = pd.DataFrame(sectiongradelevel, dtype=object)
        sectionsession = {
            'Id':[],
            'SectionId':[],
            'SessionId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'IsActiveInSession':[]
        }
        self.M365_sectionsession = pd.DataFrame(sectionsession, dtype=object)
        sectionsubject = {
            'Id':[],
            'SectionId':[],
            'RefAcademicSubjectId':[],
            'FirstSeenDateTime':[]
        }
        self.M365_sectionsubject = pd.DataFrame(sectionsubject, dtype=object)
        session = {
            'Id':[],
            'SourceSystemId':[],
            'ExternalId':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[],
            'Name':[],
            'RefSessionTypeId':[],
            'RefAcademicYearId':[],
            'StartDate':[],
            'EndDate':[],
            'ParentSessionId':[]
        }
        self.M365_session = pd.DataFrame(session, dtype=object)
        sourcesystem = {
            'Id':[],
            'Name':[],
            'FirstSeenDateTime':[],
            'LastSeenDateTime':[]
        }
        self.M365_sourcesystem = pd.DataFrame(sourcesystem, dtype=object)

        sourcepath = 'stage1/Transactional/test_data/v0.1/'
        self.students = oea.load_csv(sourcepath + 'base_students/', header=True).toPandas()
        self.schools = oea.load_csv(sourcepath + 'base_schools/', header=True).toPandas()
        self.courses = oea.load_csv(sourcepath + 'base_courses/', header=True).toPandas()
        self.sections = oea.load_csv(sourcepath + 'base_sections/', header=True).toPandas()
        self.enrollment = oea.load_csv(sourcepath + 'base_enrollment/', header=True).toPandas()
        # land the base refdef CSV unless already exists 
        refdef_exists = oea.path_exists(sourcepath + 'base_refdef/')
        if refdef_exists:
            logger.info('base_refdef CSV already exists.')
        else:
            """NOTE: subject to change depending on the directory of Insights module test data gen kit (specifically, the location of the base refdef file)."""
            data = requests.get('https://github.com/microsoft/OpenEduAnalytics/tree/main/modules/module_test_data_generation_kit/notebook/Insights_module').text
            oea.land(data, 'test_data/v0.1/base_refdef', 'base_insights_refdef.csv', oea.SNAPSHOT_BATCH_DATA)
        self.refdef = oea.load_csv(sourcepath + 'base_refdef/', header=True)
        # create refdef lookup/mapping
        self.sourcesystemid = self.faker.uuid4()
        self.sessionid = self.faker.uuid4()

    def genInsights(self,startdate='2022-01-01T00:00:00',enddate='2022-06-01T00:00:00'):
        self.startdate = startdate
        self.enddate = enddate
        self.genSourceSystem()
        self.genSectionSession()
        self.genSession()
        self.genOrganization()
        self.genSection()
        self.genSectionGradeLevel()
        self.genSectionSubject()
        self.genCourse()
        self.genCourseSubject()
        self.genCourseGradeLevel()
        self.genPersonDemographicEthnicity()
        self.genPersonDemographicPersonFlag()
        self.genPersonDemographicRace()
        self.genPersonDemographic()
        self.genPerson()
        self.genPersonPhoneNumber()
        self.genPersonEmailAddress()
        self.genPersonRelationship()
        self.genPersonIdentifier()
        self.genRefDefinition()
        self.genEnrollment() # <- this function may take a while depending on size of base_enrollment table
        self.genPersonOrganizationRole() # <- this function may take a while depending on size of base_enrollment tablethis function may take a while depending on size of base_enrollment table

    def genSourceSystem(self):
        id = self.sourcesystemid
        name = 'Source System'
        firstseendatetime = self.startdate
        lastseendatetime = self.enddate
        self.M365_sourcesystem.loc[len(self.M365_sourcesystem.index)] = [id,name,firstseendatetime,lastseendatetime]
        self.writetofile('SourceSystem', self.M365_sourcesystem)
    
    def genRefDefinition(self):
        # almost identical to writetofile function - without recreating a spark df
        genfilepath = 'stage1/Transactional/test_data/v0.1/M365_gen/RefDefintion/snapshot_batch_data/rundate='+self.currentDateTime
        self.refdef.na.drop('all')
        self.refdef.coalesce(1).write.save(oea.to_url(genfilepath), format='csv', mode='overwrite', header='false', mergeSchema='true')

    def genRefTranslation(self):
        return

    def genOrganization(self):
        for index, school in self.schools.iterrows():
            id = school['SchoolID']
            sourcesystemid = self.sourcesystemid
            externalid = random.randint(10000,99999)
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            name = school['SchoolName'] 
            identifier = school['SchoolName']
            reforganizationtypeid = self.refdef.filter(self.refdef['Code']=='School').collect()[0][0]
            parentorganizationid = ''
            self.M365_organization.loc[len(self.M365_organization.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,name,identifier,reforganizationtypeid,parentorganizationid]
        self.writetofile('Organization', self.M365_organization)

    def genSectionSession(self):
        for index, section in self.sections.iterrows():
            id = self.sessionid
            sectionid = section['SectionID']
            sessionid = self.sessionid
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate 
            isactiveinsession = True
            self.M365_sectionsession.loc[len(self.M365_sectionsession.index)] = [id,sectionid,sessionid,firstseendatetime,lastseendatetime,isactiveinsession]
        self.writetofile('SectionSession', self.M365_sectionsession)
    
    def genSession(self):
        id = self.sessionid
        sourcesystemid = self.sourcesystemid 
        externalid = random.randint(100,999)
        firstseendatetime = self.startdate
        lastseendatetime = self.enddate
        name = 'Session I' 
        refsessiontypeid = self.refdef.filter(self.refdef['Code']=='Semester').collect()[0][0]
        refacademicyearid = self.refdef.filter(self.refdef['Code']=='2022').collect()[0][0]
        startdate = self.startdate
        enddate = self.enddate
        parentsessionid = '' 
        self.M365_session.loc[len(self.M365_session.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,name,refsessiontypeid,refacademicyearid,startdate,enddate,parentsessionid]
        self.writetofile('Session', self.M365_session)
    
    def genSection(self):
        for index, section in self.sections.iterrows():
            id = section['SectionID']
            sourcesystemid = self.sourcesystemid
            externalid = ''
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            name = section['SectionName']
            organizationid = section['SchoolID']
            courseid = section['CourseID']
            code = name[-3:]
            location = section['SchoolName'] 
            self.M365_section.loc[len(self.M365_section.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,name,organizationid,courseid,code,location]
        self.writetofile('Section', self.M365_section)

    def genSectionGradeLevel(self):
        for index,section in self.sections.iterrows():
            id = self.faker.uuid4()
            sectionid = section['SectionID']
            sectiongradelevel = section['SectionGradeLevel']
            refgradelevelid = self.refdef.filter(self.refdef['Code']==f'{sectiongradelevel}').collect()[0][0]
            firstseendatetime = self.startdate 
            self.M365_sectiongradelevel.loc[len(self.M365_sectiongradelevel.index)] = [id,sectionid,refgradelevelid,firstseendatetime]
        self.writetofile('SectionGradeLevel', self.M365_sectiongradelevel)

    def genSectionSubject(self):
        for index, section in self.sections.iterrows():
            id = self.faker.uuid4()
            sectionid = section['SectionID']
            sectionsubject = section['SectionSubject']
            refacademicsubjectid = self.refdef.filter(self.refdef['Code']==sectionsubject).collect()[0][0]
            firstseendatetime = self.startdate 
            self.M365_sectionsubject.loc[len(self.M365_sectionsubject.index)] = [id,sectionid,refacademicsubjectid,firstseendatetime]
        self.writetofile('SectionSubject', self.M365_sectionsubject)

    def genCourse(self):
        for index, course in self.courses.iterrows():
            id = course['CourseID']
            sourcesystemid = self.sourcesystemid
            externalid = ''
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            name = course['CourseName']
            organizationid = course['SchoolID']
            isactiveinsession = True
            code = random.randint(1000,9999)
            academicyearsessionid = self.sessionid
            self.M365_course.loc[len(self.M365_course.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,name,organizationid,isactiveinsession,code,academicyearsessionid]
        self.writetofile('Course', self.M365_course)

    def genCourseSubject(self):
        for index, course in self.courses.iterrows():
            id = self.faker.uuid4()
            courseid = course['CourseID']
            coursesubject = course['CourseSubject']
            refacademicsubjectid = self.refdef.filter(self.refdef['Code']==coursesubject).collect()[0][0]
            firstseendatetime = self.startdate
            self.M365_coursesubject.loc[len(self.M365_coursesubject.index)] = [id,courseid,refacademicsubjectid,firstseendatetime]
        self.writetofile('CourseSubject', self.M365_coursesubject)

    def genCourseGradeLevel(self):
        for index, course in self.courses.iterrows():
            id = self.faker.uuid4()
            courseid = course['CourseID']
            coursegradelevel = course['CourseGradeLevel']
            refgradelevelid = self.refdef.filter(self.refdef['Code']==f'{coursegradelevel}').collect()[0][0]
            firstseendatetime = self.startdate 
            self.M365_coursegradelevel.loc[len(self.M365_coursegradelevel.index)] = [id,courseid,refgradelevelid,firstseendatetime]
        self.writetofile('CourseGradeLevel', self.M365_coursegradelevel)

    def genPersonOrganizationRole(self):
        refroleid = self.refdef.filter(self.refdef['Code']=='Student').collect()[0][0]
        for index, enroll in self.enrollment.iterrows():
            id = self.faker.uuid4()
            sourcesystemid = self.sourcesystemid
            externalid = ''
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            organizationid = enroll['SchoolID']
            personid = enroll['StudentID']
            sessionid = self.sessionid
            isactiveinsession = True
            rolestartdate = self.startdate
            roleenddate = self.enddate
            isprimary = ''
            coursegradelevel = enroll['CourseGradeLevel']
            refgradelevelid = self.refdef.filter(self.refdef['Code']==f'{coursegradelevel}').collect()[0][0]
            self.M365_personorganizationrole.loc[len(self.M365_personorganizationrole.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,organizationid,personid,refroleid,sessionid,isactiveinsession,rolestartdate,roleenddate,isprimary,refgradelevelid]
        self.writetofile('PersonOrganizationRole', self.M365_personorganizationrole)

    def genPersonDemographicEthnicity(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            if student['HispanicLatino'] == 'True':
                refethnicityid = 'EBD54EE0-4E76-469E-A0D1-27836518E87C'
            else:
                refethnicityid = ''
            firstseendatetime = self.startdate 
            lastseendatetime = self.enddate
            self.M365_persondemographicethnicity.loc[len(self.M365_persondemographicethnicity.index)] = [id,personid,refethnicityid,firstseendatetime,lastseendatetime]
        self.writetofile('PersonDemographicEthnicity', self.M365_persondemographicethnicity)

    def genPersonDemographicPersonFlag(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            refpersonflagid = '' 
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            self.M365_persondemographicpersonflag.loc[len(self.M365_persondemographicpersonflag.index)] = [id,personid,refpersonflagid,firstseendatetime,lastseendatetime]
        self.writetofile('PersonDemographicPersonFlag', self.M365_persondemographicpersonflag)

    def genPersonDemographicRace(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            studentrace = student['Race']
            refraceid = self.refdef.filter(self.refdef['Code']==studentrace).collect()[0][0]
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            self.M365_persondemographicrace.loc[len(self.M365_persondemographicrace.index)] = [id,personid,refraceid,firstseendatetime,lastseendatetime]
        self.writetofile('PersonDemographicRace', self.M365_persondemographicrace)
    
    def genPersonDemographic(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            if student['Gender'] == 'M':
                refsexid = 'F543B59B-8AC5-49CF-A4F1-8439613F3378'
            elif student['Gender'] == 'F':
                refsexid = '591DF534-C4D9-48D4-A465-B8DFD80C3D05'
            else:
                refsexid = 'FD5A5217-6559-47FE-ABD3-8EF53FC23E85'
            birthdate = student['Birthday']
            birthcity = student['City']
            birthstate = student['State']
            birthcountycode = student['Zipcode']
            self.M365_persondemographic.loc[len(self.M365_persondemographic.index)] = [id,personid,firstseendatetime,lastseendatetime,refsexid,birthdate,birthcity,birthstate,birthcountycode]
        self.writetofile('PersonDemographic', self.M365_persondemographic)

    def genPersonPhoneNumber(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            phonenumber = student['Phone']
            priorityorder = ''
            refphonenumbertypeid = '2EF682BF-22F4-4A13-B7E6-2F8ADCCBF3C8' 
            firstseendatetime = self.startdate
            self.M365_personphonenumber.loc[len(self.M365_personphonenumber.index)] = [id,personid,phonenumber,priorityorder,refphonenumbertypeid,firstseendatetime]
        self.writetofile('PersonPhoneNumber', self.M365_personphonenumber)

    def genPersonEmailAddress(self):
        for index, student in self.students.iterrows():
            id = self.faker.uuid4()
            personid = student['StudentID']
            emailaddress = student['Email']
            priorityorder = '' 
            refemailaddresstypeid = 'DB43E1D8-DE3D-4142-A54C-C5B27E43D59F' 
            firstseendatetime = self.startdate
            self.M365_personemailaddress.loc[len(self.M365_personemailaddress.index)] = [id,personid,emailaddress,priorityorder,refemailaddresstypeid,firstseendatetime]
        self.writetofile('PersonEmailAddress', self.M365_personemailaddress)

    def genPersonRelationship(self):
        id = '' 
        personid = '' 
        relatedpersonid = '' 
        refpersonrelationshipid = '' 
        firstseendatetime = '' 
        self.M365_personrelationship.loc[len(self.M365_personrelationship.index)] = [id,personid,relatedpersonid,refpersonrelationshipid,firstseendatetime]
        self.writetofile('PersonRelationship', self.M365_personrelationship)
    
    def genPersonIdentifier(self):
        id = '' 
        personid = '' 
        sourcesystemid = '' 
        refidentifiertypeid = '' 
        identifier = '' 
        firstseendatetime = '' 
        ispresentinsource = '' 
        self.M365_personidentifier.loc[len(self.M365_personidentifier.index)] = [id,personid,sourcesystemid,refidentifiertypeid,identifier,firstseendatetime,ispresentinsource]
        self.writetofile('PersonIdentifier', self.M365_personidentifier)

    def genPerson(self):
        for index, student in self.students.iterrows():
            id = student['StudentID']
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            surname = student['LastName']
            givenname = student['FirstName']
            middlename = student['MiddleName']
            preferredsurname = surname
            preferredgivenname = givenname
            preferredmiddlename = middlename
            self.M365_person.loc[len(self.M365_person.index)] = [id,firstseendatetime,lastseendatetime,surname,givenname,middlename,preferredsurname,preferredgivenname,preferredmiddlename]
        self.writetofile('Person', self.M365_person)
        
    def genEnrollment(self):
        for index, enroll in self.enrollment.iterrows():
            id = self.faker.uuid4() 
            sourcesystemid = self.sourcesystemid
            externalid = '' 
            firstseendatetime = self.startdate
            lastseendatetime = self.enddate
            personid = enroll['StudentID']
            sectionid = enroll['SectionID']
            refsectionroleid = '3DA186F2-D4CA-43C2-9EBB-10B0B89EDB87' 
            isactiveinsession = True
            isprimarystaffforsection = False
            entrydate = self.startdate 
            exitdate = self.enddate
            self.M365_enrollment.loc[len(self.M365_enrollment.index)] = [id,sourcesystemid,externalid,firstseendatetime,lastseendatetime,personid,sectionid,refsectionroleid,isactiveinsession,isprimarystaffforsection,entrydate,exitdate]
        self.writetofile('Enrollment',self.M365_enrollment)

    def writetofile(self,filename,dfout):
        # creates the pandas df into a pyspark df, and then writes out the generated tables to stage1
        genfilepath = 'stage1/Transactional/test_data/v0.1/M365_gen/' + filename + '/snapshot_batch_data/rundate='+self.currentDateTime
        dfOutfile = spark.createDataFrame(dfout)
        dfOutfile = dfOutfile.na.drop('all')
        dfOutfile.coalesce(1).write.save(oea.to_url(f'{genfilepath}'), format='csv', mode='overwrite', header='false', mergeSchema='true')