With this script, you can upload large CSV log files to your laptop, about 50 GB in total.

In [None]:
"""Import modules"""

import pandas as pd
import os, time


In [None]:
"""Get a list of all files with a path in a folder"""

def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = list()
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)              
    return allFiles

"""Get a list of all filenames in a folder"""

def clear_list(list_files):
    new_list = []
    for i in range(len(list_files)):
        for j in range(len(list_files[i])):
            if list_files[i][-j - 1] == '\\':
                new_list.append(list_files[i][-j:])
                break
    return new_list


In [None]:
"""Read filenames in folder"""

mypath = r'...' # path like C:\Program Files\Common Files

ListOfFiles = getListOfFiles(mypath)
onlyfiles = clear_list(ListOfFiles)

ListOfFiles

Before loading, you can define the columns that need to be loaded. Don't load all columns unnecessarily.
You can also define column types to optimize memory usage. For example, use Int16 instead of Int64 etc.

In [None]:
"""File reading options"""

# Tuple of loaded columns
DF_COLS = (col1, col2,...)
# Separator used in files .csv
SEP = ','
# The presence of quotes in the file
QUOTECHAR = '\"'
# The size of each chunk
CHUNKSIZE = 100000
# Column data types. 
DTYPE ={col1: 'Int16',
        col2: 'Int64'}

To optimize memory usage, don’t load all temporary dataframes into a common dataframe in one cell. Use a temporary list and load all temporary dataframes into a list. After that concatenate all list items into a common dataframe

In [None]:
"""Uploading log files"""

# Formation of an empty data set
df_all = pd.DataFrame()#columns=DF_COLS)

# Formation of a temporary list to optimize resources during loading
load_list = []

# Read log files and upload to list    
for file in range(len(ListOfFiles)):
    start = time.time()
    chunk = pd.read_csv(ListOfFiles[file], sep=SEP, quotechar=QUOTECHAR, chunksize=CHUNKSIZE,
                       usecols=DF_COLS, dtype=DTYPE)
                   
    temp_df = pd.concat(chunk)
    temp_df.drop_duplicates(inplace=True)
         
    load_list.append(temp_df)
    end = time.time()
    print(f'{file + 1} file out of {len(ListOfFiles)}',
          f'Elapsed time: {end - start}',
          f'File name: {ListOfFiles[file]}',
          '----------------------------', sep='\n')


In [None]:
"""Loading data from the list and combining it into a common dataset"""

for dataframe in range(len(load_list)):
    start = time.time()

    df_all = df_all.append(pd.DataFrame.from_dict(load_list[dataframe]))

    end = time.time()

    print(f'{dataframe + 1} file out of {len(load_list)}',
          f'Elapsed time: {(end - start)}', sep='\n')

df_all.info()

In [None]:
"""Removing temporary objects"""

del(temp_df)
del(load_list)


In [None]:
"""Data ready for processing"""

df_all.head(5)