In [1]:
#ProgramName: Daily-06-UpdateVERun-Min-Max-DATE-Reporting-Jupyter
#Purpose: Run collection routine for min-max details about system (Quality Assurance)
#Author:  Greg Turmel, Director, Data Governance 
#Date:    2020.08.30 - 2021.06.30
#Errata:  0.1 Improvements can be made to script using for/looping through the databases

import os, sys, argparse, pyodbc, sql, time, datetime
import errno, pathlib2
import sqlalchemy as db

from dotenv import load_dotenv # add this line
import pandas as pd
import numpy as np

from io import StringIO
import csv
import re

load_dotenv() # add this line
user = os.getenv('MySQLeUser')
password = os.getenv('MySQLeUserPass')
host = os.getenv('MySQLeHOST')
db = os.getenv('MySQLeDB')

In [2]:
#Step 1
########################################
#Core Type: Min-Max DATE as DATE
########################################
#First: Ensure PATH variable location exists on your system, if not, adjust here, and create the structure
try:
    os.makedirs(r"C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax", exist_ok=True)  
    #succeeds even if directory exists.
except FileExistsError:
    #directory already exists
    pass
########################################
#Variables: Program set up
########################################
#0. Set and specify a file name for each variable 
#1. Establish if prod or test run using csvtype, debugging using Debug variable, and set time variable to track process
#2. Read into dataframe and establish database names, schema names, and table names 
#3. For debugging issues: Write dataframe results into an output csv 
#4. Validate database is available today and then create dynamic sql file(s) for processing
now01 = datetime.datetime.now()
csvtype = 'Base' #csvtype - use 'Test or Base'

#Locate/Relocate the driving xls/csv core-noncore file structure based on your system
#Then: Read the excel file(s) with core and noncore source tables 
#(control input using csvtype variable "base" and "test") 
f1a = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\config\coreSource'+ csvtype +'.csv', encoding='1252')
f1b = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\config\noncoreSource'+ csvtype +'.csv', encoding='1252')

#Set Core dataframes
f1c = f1a['DatabaseName']           #(Core) Create dataframe with what is found in f1 dataframe
f1d = f1a['TableName']              #(Core) Create new dataframe with tables found in f1 dataframe
f1g = f1a['SchemaName']             #(Core) Create new dataframe with tables found in f1 dataframe

#Set NONCore dataframes
f1e = f1b['DatabaseName']           #(Non) Create dataframe with what is found in f1 dataframe
f1f = f1b['TableName']              #(Non) Create new dataframe with tables found in f1 dataframe
f1h = f1b['SchemaName']             #(Non) Create new dataframe with tables found in f1 dataframe

#De-Dupe Core dataframe contents
dbListC = f1c.drop_duplicates()     #de-dupe
tblListC = f1d.drop_duplicates()    #de-dupe
schemaListC = f1g.drop_duplicates() #de-dupe

#Sort Core dataframe contents
dbListC = dbListC.sort_values()            #Sort
tblListC = tblListC.sort_values()          #Sort
schemaListC = schemaListC.sort_values()    #Sort

#De-Dupe NONCore dataframe contents
dbListN = f1e.drop_duplicates()     #de-dupe
tblListN = f1f.drop_duplicates()    #de-dupe
schemaListN = f1h.drop_duplicates() #de-dupe

#Sort NONCore dataframe contents
dbListN = dbListN.sort_values()            #Sort
tblListN = tblListN.sort_values()          #Sort
schemaListN = schemaListN.sort_values()    #Sort

#Debug Control
Dbug = 'Off'     #Debugging - use On or Off  
if Dbug == 'On':
    print (dbListC)
    print (dbListN)
    #break

#Next, Set and specify a path variable for the reports: 
#Note: Make sure output location is available for write (specified from above in create dir statement) 
#Set "path" variable used throughout for file creation and data loading 
path = (r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax')
#Set variable names for output files: 1. debugging dataframes, dynamic sql generated files, and reports to build
file00 = 'dbListC.csv'
file01 = 'dbListN.csv'
file02 = 'tblListC.csv'
file03 = 'tblListN.csv'
file13 = 'schemaListC.csv'
file14 = 'schemaListN.csv'
########################################
#Setup Dynamic query seperation: CORE
#USE 'CD' for core DATE, 'CI' for core INT, USE 'CC' for core CHAR, VARCHAR, NCHAR, NVARCHAR 
########################################
file04 = 'mmDynamicCD.sql'
file05 = 'mmDynamicNCD.sql'
file06 = 'mmReport.csv'
########################################
file10 = 'mmMergeC.csv'
file11 = 'mmMergeN.csv'
file12 = 'mmMerge.csv'
########################################
#Specify a path above for reports: Make sure output location is available for write 
########################################
with open(os.path.join(path, file00), 'w') as fp: 
    pass
with open(os.path.join(path, file01), 'w') as fp: 
    pass
with open(os.path.join(path, file02), 'w') as fp: 
    pass
with open(os.path.join(path, file03), 'w') as fp: 
    pass
with open(os.path.join(path, file04), 'w') as fp: 
    pass
with open(os.path.join(path, file05), 'w') as fp: 
    pass
with open(os.path.join(path, file06), 'w') as fp: 
    pass
with open(os.path.join(path, file10), 'w') as fp: 
    pass
with open(os.path.join(path, file11), 'w') as fp: 
    pass
with open(os.path.join(path, file12), 'w') as fp: 
    pass
with open(os.path.join(path, file13), 'w') as fp: 
    pass
with open(os.path.join(path, file14), 'w') as fp: 
    pass
########################################
#Back up the *dbList, *tblList, *schemaList dataframes from input file for debug
########################################
#dbList files
with open(os.path.join(path, file00), 'a') as f:
    dbListC.to_csv(f, header=None, index=False, line_terminator='\n')
with open(os.path.join(path, file01), 'a') as f:
    dbListN.to_csv(f, header=None, index=False, line_terminator='\n')
#tblList files
with open(os.path.join(path, file02), 'a') as f:
    tblListC.to_csv(f, header=None, index=False, line_terminator='\n')
with open(os.path.join(path, file03), 'a') as f:
    tblListN.to_csv(f, header=None, index=False, line_terminator='\n')
#schemaList files
with open(os.path.join(path, file13), 'a') as f:
    schemaListC.to_csv(f, header=None, index=False, line_terminator='\n')
with open(os.path.join(path, file14), 'a') as f:
    schemaListN.to_csv(f, header=None, index=False, line_terminator='\n')
########################################
#MinMax (DATE) Core - Create Dynamic SQL - begin programming steps
########################################
for y in schemaListC:
    try:
        for x in dbListC:
            try:
                conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))

                sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmCD.sql')
                df = pd.read_sql_query(sql.read(), conn)

                with open(os.path.join(path, file04), 'a') as f:
                    df.to_string(f, header=None, index=False)

                sql.close()
                conn.close()
            finally:
                continue
    finally:
        continue

########################################
#MinMax (DATE) Core Processing of the dynamic SQL file
########################################
# Read into the df the dynamic sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicCD.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max C DATE SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for y in schemaListC:
        try:
            for x in dbListC:
                try:
                    conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
                    cursor = conn.cursor()
                    cursor.execute(sqlCmd)
                    row = cursor.fetchone()
                    if row == None:
                        break
                    df = pd.DataFrame([row])
                    #if df == None:
                        #break
                    with open(os.path.join(path, file06), 'a') as f:
                        df.to_csv(f, header=None, index=False, line_terminator='\n')
                    with open(os.path.join(path, file10), 'a') as f:
                        df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
                    with open(os.path.join(path, file12), 'a') as f:
                        df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
                    # Close cursor to database
                    cursor.close()
                    # Close connection to database
                    conn.close()
                except:
                    continue
                finally:
                    continue
        finally:
            continue

now01 = datetime.datetime.now()
print('Processing Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
#Print (" ")
########################################
#MinMax (DATE) NonCore - Create Dynamic SQL - Seperating NONcore results from reporting
########################################
for x in dbListN:
    try:
        conn = pyodbc.connect('Server=PRODODSSQL;'
                              'Trusted_Connection=yes;'
                              'DRIVER={{SQL Server}};'
                              'Database={0}'.format(x))

        sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmNCD.sql')
        df = pd.read_sql_query(sql.read(), conn)

        with open(os.path.join(path, file05), 'a') as f:
            df.to_string(f, header=None, index=False)

        sql.close()
        conn.close()
    finally:
        continue

########################################
#MinMax (DATE) NonCore Processing - begin processing dynamic SQL
########################################
#Read into the df the dynamic NC sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicNCD.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max NC DATE SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for x in dbListN:
        try:
            conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
            cursor = conn.cursor()
            cursor.execute(sqlCmd)
            row = cursor.fetchone()
            if row == None:
                break
            df = pd.DataFrame([row])
            #if df == None:
                #break
            with open(os.path.join(path, file06), 'a') as f:
                df.to_csv(f, header=None, index=False, line_terminator='\n')
            with open(os.path.join(path, file11), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            with open(os.path.join(path, file12), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            #Close cursor to database
            cursor.close()
            #Close connection to database
            conn.close()
        except:
            continue
        finally:
            continue

########################################
now01 = datetime.datetime.now()
print('Processing Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
########################################
print('**** Connection Closed - Report Generated - Program Completed ****')

Processing Dynamic Min-Max C DATE SQL:  2021-05-05 07:32:43
Processing Complete:  2021-05-05 07:32:54
Processing Dynamic Min-Max NC DATE SQL:  2021-05-05 07:33:07
Processing Complete:  2021-05-05 08:05:55
**** Connection Closed - Report Generated - Program Completed ****


In [3]:
#Core Type 
#Variables: 
now = datetime.datetime.now()
csvtype = 'Base' #csvtype - use 'Test or Base'
Dbug = 'Off'     #Debugging - use On or Off  

########################################
#Dynamic query seperation: CORE
#USE 'CI' for INT, USE 'CC' for INT, USE 'CV' for VARINT, USE 'CNV' for NVARINT, USE 'CN' for NINT 
########################################
file04 = 'mmDynamicCI.sql'
file05 = 'mmDynamicNCI.sql'
file06 = 'mmReport.csv'
########################################
file12 = 'mmMerge.csv'
########################################

#Creating a report file to load todays data into at the specified location 
with open(os.path.join(path, file04), 'w') as fp: 
    pass
with open(os.path.join(path, file05), 'w') as fp: 
    pass

#Min (INT) Core - Create Dynamic SQL
for x in dbListC:
    try:
        conn = pyodbc.connect('Server=PRODODSSQL;'
                              'Trusted_Connection=yes;'
                              'DRIVER={{SQL Server}};'
                              'Database={0}'.format(x))

        sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmCI.sql')
        df = pd.read_sql_query(sql.read(), conn)

        with open(os.path.join(path, file04), 'a') as f:
            df.to_string(f, header=None, index=False)

        sql.close()
        conn.close()
    finally:
        continue

########################################
#MinMax (INT) Core Processing 
########################################
#Read into the df the dynamic sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicCI.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max C INT SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for x in dbListC:
        try:
            conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
            cursor = conn.cursor()
            cursor.execute(sqlCmd)
            row = cursor.fetchone()
            if row == None:
                break
            df = pd.DataFrame([row])
            #if df == None:
                #break
            with open(os.path.join(path, file06), 'a') as f:
                df.to_csv(f, header=None, index=False, line_terminator='\n')
            with open(os.path.join(path, file10), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            with open(os.path.join(path, file12), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            #Close cursor to database
            cursor.close()
            #Close connection to database
            conn.close()
        except:
            continue
        finally:
            continue

now01 = datetime.datetime.now()
print('Processing Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
#Print (" ")
########################################
#MinMax (INT) NonCore - Create Dynamic SQL 
########################################
for x in dbListN:
    try:
        conn = pyodbc.connect('Server=PRODODSSQL;'
                              'Trusted_Connection=yes;'
                              'DRIVER={{SQL Server}};'
                              'Database={0}'.format(x))

        sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmNCI.sql')
        df = pd.read_sql_query(sql.read(), conn)

        with open(os.path.join(path, file05), 'a') as f:
            df.to_string(f, header=None, index=False)

        sql.close()
        conn.close()
    finally:
        continue

########################################
#MinMax (INT) Non-Core Processing 
########################################
#Read into the df the dynamic sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicNCI.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max NC INT SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for x in dbListN:
        try:
            conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
            cursor = conn.cursor()
            cursor.execute(sqlCmd)
            row = cursor.fetchone()
            if row == None:
                break
            df = pd.DataFrame([row])
            #if df == None:
                #break
            with open(os.path.join(path, file06), 'a') as f:
                df.to_csv(f, header=None, index=False, line_terminator='\n')
            with open(os.path.join(path, file11), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            with open(os.path.join(path, file12), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            #Close cursor to database
            cursor.close()
            #Close connection to database
            conn.close()
        except:
            continue
        finally:
            continue

########################################
now01 = datetime.datetime.now()
print('Processing Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
########################################
print('**** Connection Closed - Report Generated - Program Completed ****')

Processing Dynamic Min-Max C INT SQL:  2021-05-05 10:44:37
Processing Complete:  2021-05-05 10:45:02
Processing Dynamic Min-Max NC INT SQL:  2021-05-05 10:45:24
Processing Complete:  2021-05-05 11:01:51
**** Connection Closed - Report Generated - Program Completed ****


In [4]:
#Core Type 
#Variables: 
########################################
#Dynamic query seperation: CORE
#USE 'CI' for INT, USE 'CC' for CHAR, USE 'CV' for VARCHAR, USE 'CNV' for NVARCHAR, USE 'CN' for NCHAR 
########################################
file04 = 'mmDynamicCC.sql'
file05 = 'mmDynamicNCC.sql'
file06 = 'mmReport.csv'
########################################
file12 = 'mmMerge.csv'
########################################

#Creating a report file to load todays data into at the specified location 
with open(os.path.join(path, file04), 'w') as fp: 
    pass
with open(os.path.join(path, file05), 'w') as fp: 
    pass
########################################
#MinMax (CHAR) Core - Create Dynamic SQL
########################################
for x in dbListC:
    try:
        conn = pyodbc.connect('Server=PRODODSSQL;'
                              'Trusted_Connection=yes;'
                              'DRIVER={{SQL Server}};'
                              'Database={0}'.format(x))

        sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmCC.sql')
        df = pd.read_sql_query(sql.read(), conn)

        with open(os.path.join(path, file04), 'a') as f:
            df.to_string(f, header=None, index=False)

        sql.close()
        conn.close()
    finally:
        continue

########################################
#MinMax (CHAR) Core Processing 
########################################
#Read into the df the dynamic sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicCC.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max C CHAR SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for x in dbListC:
        try:
            conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
            cursor = conn.cursor()
            cursor.execute(sqlCmd)
            row = cursor.fetchone()
            if row == None:
                break
            df = pd.DataFrame([row])
            #if df == None:
                #break
            with open(os.path.join(path, file06), 'a') as f:
                df.to_csv(f, header=None, index=False, line_terminator='\n')
            with open(os.path.join(path, file10), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            with open(os.path.join(path, file12), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            #Close cursor to database
            cursor.close()
            #Close connection to database
            conn.close()
        except:
            continue
        finally:
            continue

now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max C CHAR SQL Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
#Print (" ")
########################################
#MinMax (CHAR) Core - Create Dynamic SQL 
########################################
for x in dbListN:
    try:
        conn = pyodbc.connect('Server=PRODODSSQL;'
                              'Trusted_Connection=yes;'
                              'DRIVER={{SQL Server}};'
                              'Database={0}'.format(x))

        sql = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\query\mmNCC.sql')
        df = pd.read_sql_query(sql.read(), conn)

        with open(os.path.join(path, file05), 'a') as f:
            df.to_string(f, header=None, index=False)

        sql.close()
        conn.close()
    finally:
        continue

########################################
#MinMax (CHAR) NonCore Processing 
########################################
#Read into the df the dynamic sql file built above
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmDynamicNCC.sql')
sqlFile = s.read()
s.close()

#strip and then split read on';' 
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max SQL: ',now01.strftime("%Y-%m-%d %H:%M:%S"))

for sqlCmd in sqlFile.split(';'):
    for x in dbListN:
        try:
            conn = pyodbc.connect('Server=PRODODSSQL;'
                                  'Trusted_Connection=yes;'
                                  'DRIVER={{SQL Server}};'
                                  'Database={0}'.format(x))
            cursor = conn.cursor()
            cursor.execute(sqlCmd)
            row = cursor.fetchone()
            if row == None:
                break
            df = pd.DataFrame([row])
            #if df == None:
                #break
            with open(os.path.join(path, file06), 'a') as f:
                df.to_csv(f, header=None, index=False, line_terminator='\n')
            with open(os.path.join(path, file11), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            with open(os.path.join(path, file12), 'a') as f:
                df.to_csv(f, header=None, sep=',', index=False, encoding='cp1252', line_terminator='\n')
            #Close cursor to database
            cursor.close()
            #Close connection to database
            conn.close()
        except:
            continue
        finally:
            continue

########################################
now01 = datetime.datetime.now()
print('Processing Dynamic Min-Max NC CHAR SQL Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))
########################################
print('**** Connection Closed - Report Generated - Program Completed ****')

Processing Dynamic Min-Max C CHAR SQL:  2021-05-05 11:01:57
Processing Dynamic Min-Max C CHAR SQL Complete:  2021-05-05 11:01:58
Processing Dynamic Min-Max SQL:  2021-05-05 11:02:14
Processing Dynamic Min-Max NC CHAR SQL Complete:  2021-05-05 11:02:14
**** Connection Closed - Report Generated - Program Completed ****


In [5]:
print('**** Min-Max Date data generated for reporting - This Jupyter Notebook Program has Completed ****')

**** Min-Max Date data generated for reporting - This Jupyter Notebook Program has Completed ****


In [6]:
s01 = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReport.csv')
sqlFile15 = s01.read()
s01.close()

#with open(os.path.join(path, file06), 'a') as f:
    #df.to_csv(f, header=None, index=False, line_terminator='\n')
 

In [7]:
#ALL datasets - CORE and NON
df = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReportwHeaders.csv')
df1c=df[df.MaxDate == 2019]
f1c.head()

#b = df[(df['MaxDate'] > 2000) & (df['MaxDate'] < 2018)]
#df.query('2000 < MaxDate < 2019')
mask = df['MaxDate'].isin([2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021])
df.head()

Unnamed: 0,MinDate,MaxDate,DatabaseName,SchemaName,TableName,ColumnName
0,201,2019,ApplicationData,dbo,AcademicContract,InitConfDate
1,2020,2021,ApplicationData,dbo,AcademicContract,InitConfDate
2,201,2019,ApplicationData,dbo,AcademicContract,InitConfDate
3,2020,2021,ApplicationData,dbo,AcademicContract,InitConfDate
4,2018,2019,ApplicationData,dbo,AcademicContract,LastModifiedDate


In [8]:
#ALL datasets - CORE and NON
df = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReportwHeaders.csv')

df1a=df.groupby(['DatabaseName','SchemaName','TableName'],as_index=False) ['MaxDate'].max()
df1a.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxAI.xlsx')

df1a=df1a.groupby(['DatabaseName','SchemaName','TableName'])['MaxDate'].max()
df1a.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxAA.xlsx')

In [9]:
#ALL datasets - CORE and NON
df = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReportwHeaders.csv')

df1a=df.groupby(['DatabaseName','SchemaName','TableName','ColumnName'],as_index=False) ['MaxDate'].max()
df1a=df1a.groupby(['DatabaseName','SchemaName','TableName'])['MaxDate'].max()
df1a.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxA.xlsx')

df1b=df.groupby(['DatabaseName','SchemaName','TableName','ColumnName'],as_index=False)['MaxDate'].max()
df1b=df.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
df1b.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxB.xlsx')


In [10]:
df1c=df[df.MaxDate == 2019]
df1c=df1c.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
df1c.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxC2019.xlsx')

df1d=df[df['MaxDate']==2018]
df1d=df1d.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
df1d.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxD2018.xlsx')

df1e=df[df['MaxDate'] != 2017]
df1e=df1e.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
df1e.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm1ReportMaxE2017.xlsx')


In [11]:
#Only CORE
dfc = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReportCwHeaders.csv')
dfc2a=dfc.groupby(['DatabaseName','SchemaName','TableName'])['MaxDate'].max()
dfc2a.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm2ReportMaxA2.xlsx')

dfc2b=dfc.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
dfc2b.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm2ReportMaxB2.xlsx')

In [12]:
#Only NON-CORE
dfn = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmReportNwHeaders.csv')
dfn3a=dfn.groupby(['DatabaseName','SchemaName','TableName'])['MaxDate'].max()
dfn3a.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm3ReportMaxA3.xlsx')

dfn3b=dfn.groupby(['DatabaseName','SchemaName','TableName'],as_index=False)['MaxDate'].max()
dfn3b.to_excel(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\Reports\mm3ReportMaxB3.xlsx')

In [13]:
print('**** Min-Max Date Report Generated - This Jupyter Notebook Program has Completed ****')

**** Min-Max Date Report Generated - This Jupyter Notebook Program has Completed ****


In [14]:
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC.csv','r').read()

chars = ('$','%','^','*','(',')','\'') # etc
for c in chars:
  s = ' '.join( s.split(c) ).lstrip()

out_file = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC_temp.csv','w')
out_file.write(s)
out_file.close()

s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC_temp.csv','r').read()

chars = ('.') # etc
for c in chars:
  s = ','.join( s.split(c) ).lstrip()

out_file = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC_temp2.csv','w')
out_file.write(s)
out_file.close()

df = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC_temp2.csv')
with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeC_new.csv', 'w') as f:
    df.to_csv(f, header=f.tell()==0, sep=',', index=False, encoding='cp1252', line_terminator='\n')

now01 = datetime.datetime.now()
print('DataPrep C Complete: ',now01.strftime("%Y-%m-%d %H:%M:%S"))


DataPrep C Complete:  2021-05-05 11:02:28


In [15]:
s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN.csv','r').read()

chars = ('$','%','^','*','(',')','\'') # etc
for c in chars:
  s = ' '.join( s.split(c) ).lstrip()

out_file = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp.csv','w')
out_file.write(s)
out_file.close()

s = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp.csv','r').read()

chars = ('.') # etc
for c in chars:
  s = ','.join( s.split(c) ).lstrip()

out_file = open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv','w')
out_file.write(s)
out_file.close()


In [16]:
  
# reading data from a csv file 'Data.csv'
with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', newline='') as file:
    
    reader = csv.reader(file, delimiter = ',')
      
    # store the headers in a separate variable,
    # move the reader object to point on the next row
    headings = next(reader)
      
    # output list to store all rows
    Output = []
    for row in reader:
        Output.append(row[:])
  
for row_num, rows in enumerate(Output):
    print('data in row number {} is {}'.format(row_num+1, rows))
  
print('headers were: ', headings)

data in row number 1 is ['  MINDATE , None,  MAXDATE , None,  ApplicationData,SSIS_RunTime,RUN_PACKAGE_OVERRIDES,WindowBegin  ']
data in row number 2 is ['  MINDATE , 1900,  MAXDATE , 2021,  ApplicationData,SSIS_RunTime,RUN,WindowBegin  ']
data in row number 4 is ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentCreditsEarned,UpdateDateTime  ']
data in row number 5 is ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentCoursesTaken,UpdateDateTime  ']
data in row number 6 is ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentAcademies,UpdateDateTime  ']
data in row number 7 is ['  MINDATE , 2017,  MAXDATE , 2018,  ApplicationData,StudentRegistration,DIM_CoursePathways_12202018,UpdateDateTime  ']
data in row number 8 is ['  MINDATE , 2017,  MAXDATE , 2021,  ApplicationData,StudentRegistration,DIM_CoursePathways,UpdateDateTime  ']
data in row number 9 is ['  MINDATE , 2016,  MAXDATE , 

data in row number 565 is ['  MINDATE , 2000,  MAXDATE , 2011,  Assessment,dbo,STU_GWY10_DOMN_SCR,GWY10_TEST_DT  ']
data in row number 566 is ['  MINDATE , 2007,  MAXDATE , 2011,  Assessment,dbo,STU_GWA8_TEST_SCR_STG,GWA8_TEST_DT  ']
data in row number 567 is ['  MINDATE , 2007,  MAXDATE , 2011,  Assessment,dbo,STU_GWA8_TEST_SCR,GWA8_TEST_DT  ']
data in row number 568 is ['  MINDATE , 2007,  MAXDATE , 2011,  Assessment,dbo,STU_GWA8_DOMN_SCR_STG,GWA8_TEST_DT  ']
data in row number 569 is ['  MINDATE , 2007,  MAXDATE , 2011,  Assessment,dbo,STU_GWA8_DOMN_SCR,GWA8_TEST_DT  ']
data in row number 570 is ['  MINDATE , 2007,  MAXDATE , 2011,  Assessment,dbo,STU_GWA5_TEST_SCR_STG,GWA5_TEST_DT  ']
data in row number 571 is ['  MINDATE , None,  MAXDATE , None,  Assessment,dbo,PS_EMPLOYEES,GVT_WGI_DUE_DATE  ']
data in row number 572 is ['  MINDATE , None,  MAXDATE , None,  Assessment,dbo,PS_EMPLOYEES,GVT_SUPV_PROB_DT  ']
data in row number 573 is ['  MINDATE , None,  MAXDATE , None,  Assessment,d

data in row number 1513 is ['  MINDATE , 2020,  MAXDATE , 2021,  GSDR,GBStaging,D2L_SCORE_Import,ScoreDate  ']
data in row number 1514 is ['  MINDATE , 2016,  MAXDATE , 2020,  GSDR,GBStaging,D2L_SCORE_Import,ScoreDate  ']
data in row number 1515 is ['  MINDATE , 2020,  MAXDATE , 2020,  GSDR,dbo,IOWA_CoGAT_Report,RUNDATE  ']
data in row number 1516 is ['  MINDATE , 2020,  MAXDATE , 2020,  GSDR,dbo,IOWA_CoGAT_Report,RUNDATE  ']
data in row number 1517 is ['  MINDATE , 2020,  MAXDATE , 2020,  GSDR,dbo,IOWA_CoGAT_Report,RUNDATE  ']
data in row number 1518 is ['  MINDATE , 2016,  MAXDATE , 2017,  GSDR,GEMS,NEXT_SCHOOL_Audit,run_datetime  ']
data in row number 1519 is ['  MINDATE , 2016,  MAXDATE , 2017,  GSDR,GEMS,NEXT_SCHOOL_Audit,run_datetime  ']
data in row number 1520 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideTeacherSections,RevisionCount  ']
data in row number 1521 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideTeacherSections,RevisionCount  ']
data in row 

data in row number 1863 is ['  MINDATE , 2019,  MAXDATE , 2019,  GSDR,essa,OverrideTeachers,DateCreated  ']
data in row number 1864 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideStudentSections,DateCreated  ']
data in row number 1865 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideStudentSections,DateCreated  ']
data in row number 1866 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideStudents,DateCreated  ']
data in row number 1867 is ['  MINDATE , None,  MAXDATE , None,  GSDR,essa,OverrideStudents,DateCreated  ']
data in row number 1868 is ['  MINDATE , 2010,  MAXDATE , 2013,  GSDR,dbo,SFN_Export,DateBalanceLastUpdated  ']
data in row number 1869 is ['  MINDATE , 2010,  MAXDATE , 2013,  GSDR,dbo,SFN_Export,DateBalanceLastUpdated  ']
data in row number 1870 is ['  MINDATE , 2020,  MAXDATE , 2021,  GSDR,dbo,Patrons_For_Datamove,DateBalanceLastUpdated  ']
data in row number 1871 is ['  MINDATE , 2019,  MAXDATE , 2020,  GSDR,dbo,Patrons_For_Datamove,DateB

data in row number 2862 is ['  MINDATE , 2019,  MAXDATE , 2020,  Predictive_Analytics,ABCETL,StuSchAttSumPrev,LatestDt  ']
data in row number 2863 is ['  MINDATE , 1988,  MAXDATE , 2011,  Predictive_Analytics,PAWORK2,Student_Outcome,LatestDate9thGrade  ']
data in row number 2864 is ['  MINDATE , 1988,  MAXDATE , 2011,  Predictive_Analytics,PAWORK2,Student_Graduation,LatestDate9thGrade  ']
data in row number 2865 is ['  MINDATE , 2005,  MAXDATE , 2010,  Predictive_Analytics,PA3,Student_Outcome,LatestDate9thGrade  ']
data in row number 2866 is ['  MINDATE , 2005,  MAXDATE , 2010,  Predictive_Analytics,PA3,Student_Graduation,LatestDate9thGrade  ']
data in row number 2867 is ['  MINDATE , 2002,  MAXDATE , 2013,  Predictive_Analytics,PAWORK3,Student_Attendance_Dates,LatestAttendanceDate  ']
data in row number 2868 is ['  MINDATE , 2001,  MAXDATE , 2013,  Predictive_Analytics,PAWORK2,Student_Outcome,LatestAttendanceDate  ']
data in row number 2869 is ['  MINDATE , 2001,  MAXDATE , 2013,  Pre

data in row number 3342 is ['  MINDATE ,  2018 ,  MAXDATE ,  2021 ,  ApplicationData,StudentProfile,FACT_StudentTrendingGrades,SchoolYear  ']
data in row number 3343 is ['  MINDATE ,  2018 ,  MAXDATE ,  2018 ,  ApplicationData,StudentProfile,FACT_StudentTrendingGrades_20180101,SchoolYear  ']
data in row number 3344 is ['  MINDATE ,  2016 ,  MAXDATE ,  2018 ,  ApplicationData,StudentProfile,FACT_StudentTrendingGrades_20180328,SchoolYear  ']
data in row number 3345 is ['  MINDATE ,  2016 ,  MAXDATE ,  2021 ,  ApplicationData,StudentProfile,MAP_EducatorStudentCourse,SchoolYear  ']
data in row number 3346 is ['  MINDATE ,  2021 ,  MAXDATE ,  2021 ,  ApplicationData,StudentRegistration,Defaults,SchoolYear  ']
data in row number 3347 is ['  MINDATE ,  2015 ,  MAXDATE ,  2919 ,  ApplicationData,StudentRegistration,FACT_StudentCoursesTaken,SchoolYear  ']
data in row number 3348 is ['  MINDATE , None,  MAXDATE , None,  ApplicationData,StudentRegistration,OpenHouse,SchoolYear  ']
data in row num

data in row number 4212 is ['  MINDATE ,  2005 ,  MAXDATE ,  2016 ,  GSDR,GEMS,elig_rosters_lastupdate,DT_ADDED  ']
data in row number 4213 is ['  MINDATE ,  2005 ,  MAXDATE ,  2016 ,  GSDR,GEMS,elig_rosters_lastupdate,DT_ADDED  ']
data in row number 4214 is ['  MINDATE , None,  MAXDATE , None,  GSDR,GEMS,elig_rosters_nodups,dt_added  ']
data in row number 4215 is ['  MINDATE , None,  MAXDATE , None,  GSDR,GEMS,elig_rosters_nodups,dt_added  ']
data in row number 4216 is ['  MINDATE ,  2005 ,  MAXDATE ,  2018 ,  GSDR,GEMS,elig_rosters_sy1718_aftersummercalc,DT_ADDED  ']
data in row number 4217 is ['  MINDATE ,  2005 ,  MAXDATE ,  2018 ,  GSDR,GEMS,elig_rosters_sy1718_aftersummercalc,DT_ADDED  ']
data in row number 4218 is ['  MINDATE ,  2005 ,  MAXDATE ,  2019 ,  GSDR,GEMS,elig_rosters_sy1819_aftersummercalc,DT_ADDED  ']
data in row number 4219 is ['  MINDATE ,  2005 ,  MAXDATE ,  2019 ,  GSDR,GEMS,elig_rosters_sy1819_aftersummercalc,DT_ADDED  ']
data in row number 4220 is ['  MINDATE ,

data in row number 4645 is ['  MINDATE ,  0 ,  MAXDATE ,  2021 ,  GSDR,GEMS,PS_EMPLOYEE,REHIRE_DATE  ']
data in row number 4646 is ['  MINDATE ,  0 ,  MAXDATE ,  2020 ,  GSDR,GEMS,PS_EMPLOYEE,REHIRE_DATE  ']
data in row number 4647 is ['  MINDATE ,  0 ,  MAXDATE ,  2017 ,  GSDR,GEMS,PS_EMPLOYEE_JIC_20170206,REHIRE_DATE  ']
data in row number 4648 is ['  MINDATE ,  0 ,  MAXDATE ,  2017 ,  GSDR,GEMS,PS_EMPLOYEE_JIC_20170206,REHIRE_DATE  ']
data in row number 4649 is ['  MINDATE ,  0 ,  MAXDATE ,  2021 ,  GSDR,GEMS,SASI_UATR,REQUESTDT  ']
data in row number 4650 is ['  MINDATE ,  0 ,  MAXDATE ,  2020 ,  GSDR,GEMS,SASI_UATR,REQUESTDT  ']
data in row number 4651 is ['  MINDATE ,  0 ,  MAXDATE ,  2017 ,  GSDR,GEMS,SASI_UATR_NEXT,REQUESTDT  ']
data in row number 4652 is ['  MINDATE ,  0 ,  MAXDATE ,  2017 ,  GSDR,GEMS,SASI_UATR_NEXT,REQUESTDT  ']
data in row number 4653 is ['  MINDATE ,  0 ,  MAXDATE ,  2021 ,  GSDR,GEMS,DSCP_PANEL,RESTITUTION_DT  ']
data in row number 4654 is ['  MINDATE ,  

data in row number 5758 is ['  MINDATE ,  0 ,  MAXDATE ,  2020 ,  GSDR_Temp,GEMS,TITLE1_SWP,EXITDATE  ']
data in row number 5759 is ['  MINDATE , None,  MAXDATE , None,  GSDR_Temp,GEMS,TITLE1_TAS,EXITDATE  ']
data in row number 5760 is ['  MINDATE , None,  MAXDATE , None,  GSDR_Temp,GEMS,TITLE1_TAS,EXITDATE  ']
data in row number 5761 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE2  ']
data in row number 5762 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE2  ']
data in row number 5763 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE3  ']
data in row number 5764 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE3  ']
data in row number 5765 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE4  ']
data in row number 5766 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_A504,EXTDATE4  ']
data in row number 5767 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SAS

data in row number 5936 is ['  MINDATE ,  1988 ,  MAXDATE ,  2019 ,  GSDR_Temp,StudentProfile,FACT_AttendanceByType,NK2_SchoolYear  ']
data in row number 5937 is ['  MINDATE ,  2001 ,  MAXDATE ,  2020 ,  GSDR_Temp,GEMS,NOSHOWS,NOSHOW_DATE  ']
data in row number 5938 is ['  MINDATE ,  2001 ,  MAXDATE ,  2020 ,  GSDR_Temp,GEMS,NOSHOWS,NOSHOW_DATE  ']
data in row number 5939 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_ACNF,NOTIFDT  ']
data in row number 5940 is ['  MINDATE ,  0 ,  MAXDATE ,  0 ,  GSDR_Temp,GEMS,SASI_ACNF,NOTIFDT  ']
data in row number 5941 is ['  MINDATE ,  2001 ,  MAXDATE ,  2021 ,  GSDR_Temp,GEMS,SASI_ELL_PARENT_CONF,NOTIFICATION_DATE  ']
data in row number 5942 is ['  MINDATE ,  2001 ,  MAXDATE ,  2029 ,  GSDR_Temp,GEMS,SASI_ELL_PARENT_CONF,NOTIFICATION_DATE  ']
data in row number 5943 is ['  MINDATE ,  0 ,  MAXDATE ,  2049 ,  GSDR_Temp,GEMS,SASI_UABN,NTCDATE  ']
data in row number 5944 is ['  MINDATE ,  0 ,  MAXDATE ,  2049 ,  GSDR_Temp,GEMS,SASI_UABN,N

In [17]:
  
# reading data from a csv file 'Data.csv'
with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', newline='') as file:
    reader = csv.reader(file, delimiter = ',')
    df = pd.DataFrame(reader, columns = [0]) 

df.head( n = 2)

Unnamed: 0,0
0,"MINDATE , 2016, MAXDATE , 2021, Applicatio..."
1,"MINDATE , None, MAXDATE , None, Applicatio..."


for a DataFrame :

df = pd.DataFrame([' 164', '164', '164 ', '  164  '])
df.applymap(trim_function)

For a Series

df = pd.Series([' 164', '164', '164 ', '  164  '])
df.apply(trim_function)

For an Index

df = pd.Index([' 164', '164', '164 ', '  164  '])
df.map(trim_function)


In [18]:

# reading data from a csv file 'Data.csv'
with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', newline='') as file:
    reader = csv.reader(file, delimiter = ',')
    df = pd.Series(reader)
    #df2 = df.values.tolist()
    #df = df.values.tolist()
    #by_column = [df[x].values.tolist() for x in df.columns]

trim_function = lambda x : re.findall("^\s*(.*?)\s*$",str(x))[0]
#df.apply(trim_function)
#df.applymap(trim_function)
#df.map(trim_function)
#df.info()
#by_column.info()

#Cov = pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', sep=',', header=None)
#Cov.columns = ["MinText", "MinDate", "MaxText", "MaxDate", "DatabaseName", "SchemaName", "TableName", "ColumnName"]

df.head(n = 2)
#Cov.head(n = 2)

0    [  MINDATE , 2016,  MAXDATE , 2021,  Applicati...
1    [  MINDATE , None,  MAXDATE , None,  Applicati...
dtype: object

In [19]:

# reading data from a csv file 'Data.csv'
with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', newline='') as file:
    reader = csv.reader(file, delimiter = ',')
    df = pd.Series(reader)
    df2 = df.values.tolist()

df2

[['  MINDATE , 2016,  MAXDATE , 2021,  ApplicationData,SSIS_RunTime,RUN,WindowEnd  '],
 ['  MINDATE , None,  MAXDATE , None,  ApplicationData,SSIS_RunTime,RUN_PACKAGE_OVERRIDES,WindowBegin  '],
 ['  MINDATE , 1900,  MAXDATE , 2021,  ApplicationData,SSIS_RunTime,RUN,WindowBegin  '],
 ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentCreditsEarned,UpdateDateTime  '],
 ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentCoursesTaken,UpdateDateTime  '],
 ['  MINDATE , 2020,  MAXDATE , 2021,  ApplicationData,StudentRegistration,FACT_StudentAcademies,UpdateDateTime  '],
 ['  MINDATE , 2017,  MAXDATE , 2018,  ApplicationData,StudentRegistration,DIM_CoursePathways_12202018,UpdateDateTime  '],
 ['  MINDATE , 2017,  MAXDATE , 2021,  ApplicationData,StudentRegistration,DIM_CoursePathways,UpdateDateTime  '],
 ['  MINDATE , 2016,  MAXDATE , 2021,  ApplicationData,StudentProfile,MAP_EducatorStudentCourse,UpdateDateTime  '],
 ['  M

In [20]:

with open(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', newline='') as file:
    reader = csv.reader(file, delimiter = ',')
    df = pd.Series(reader)
    df = pd.DataFrame({'a': [' ', '\t', '  ']})
    for item in df.a:
        print (len(item))

#df.a = df.a.str.strip(' ')
#for item in df.a:
    #print (len(item))

1
1
2


In [21]:
reader = csv.reader(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_temp2.csv', delimiter = ',')

df = pd.read_csv(StringIO(reader),       #Consider using six.StringIO instead of io.StringIO.
                 skipinitialspace=True,
                 parse_dates=True,
                 header=None,
                 index_col=0,
                 sep=',',
                 usecols=[0,1,2,3,4,5,6,7],
                 names='0 1 2 3 4 5 6 7 8 9'.split())



TypeError: initial_value must be str or None, not _csv.reader

In [None]:
df.head()

In [None]:
pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_new.csv', sep=',', header=None)

In [None]:
pd.read_csv(r'C:\Users\e201873842\Documents\Jupyter\Daily\pkg\PyMinMax\mmMergeN_new.csv', sep=',', usecols=["MinText", "MinDate", "MaxText", "MaxDate", "DatabaseName", "SchemaName", "TableName", "ColumnName"])