# Analyzing Responses of LLM to prompt

In [1]:
# first we need to import the basic libraries
# date
from datetime import datetime
now = datetime.now()
print(f"Date: {now}")
# python version
import sys
print(f"Python version: {sys.version}")
from pathlib import Path
import json
# import time for delay
import time
import requests
# print version
print(f"Requests version: {requests.__version__}")

from tqdm import tqdm

import pandas as pd
print(f"Pandas version: {pd.__version__}")

Date: 2025-02-28 17:50:54.356760
Python version: 3.12.7 (tags/v3.12.7:0b05ead, Oct  1 2024, 03:06:41) [MSC v.1941 64 bit (AMD64)]
Requests version: 2.32.3
Pandas version: 2.2.2


In [2]:
# let's see what folders are in our ../data/responses folder
data_folder = Path("../data/responses")
print(f"Data folder: {data_folder}")
print(f"Data folder exists: {data_folder.exists()}")
print(f"Data folder is dir: {data_folder.is_dir()}")
# let's see what subfolders are in our data folder
subfolders = [f for f in data_folder.iterdir() if f.is_dir()]
print(f"Subfolders:")
for subfolder in subfolders:
    print(subfolder)

Data folder: ..\data\responses
Data folder exists: True
Data folder is dir: True
Subfolders:
..\data\responses\2025_01_28_gemini_2_experimental
..\data\responses\2025_01_29_google_gemini-flash-1.5-8b_no_terms
..\data\responses\2025_01_29_google_gemini-flash-1.5-8b_with_terms
..\data\responses\2025_02_04_google_gemini-flash-1.5-8b_with_terms
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_1
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_2
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt_2
..\data\responses\2025_02_27_google_gemini-flash-1.5_maritime_prompt
..\data\responses\2025_02_28_google_gemini-flash-1.5_air_prompt
..\data\responses\consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt
..\data\responses\consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2
..\data\responses\temp_responses_2025_02_26
..\data\responses\unconsolidated_2025_02_26_op

## Consolidate openai responses

OpenAI prompts required us to break down files into smaller chunks, now we need to consolidate them back into a single file.


```python

In [4]:
# subfolders that contain openai in their name
openai_folders = [f for f in data_folder.iterdir() if f.is_dir() and "openai" in f.name]
print(f"OpenAI folders:")
for openai_folder in openai_folders:
    print(openai_folder)
    

OpenAI folders:
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt
..\data\responses\2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2


In [5]:
# we want to create a function that given a subfolder will return a dictionary 
# keys will be first three parts of file name when split by _
# values will be actual file names
def get_files(subfolder):
    files = {}
    for file in subfolder.iterdir():
        if file.is_file():
            parts = file.name.split("_")
            key = "_".join(parts[:3])
            if key in files:
                files[key].append(file)
            else:
                files[key] = [file]
    return files

# let's run this function on one of the openai folders
openai_files = get_files(openai_folders[0])
print(f"OpenAI files:")
for key, value in openai_files.items():
    print(f"{key}: {value}")

OpenAI files:
AustA_KaspG_948026: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_KaspG_948026_0.txt')]
AustA_Puisk_1047362: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_1.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/AustA_Puisk_1047362_2.txt')]
FimbK_KadNa_1049450: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_KadNa_1049450_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_KadNa_1049450_1.txt')]
FimbK_TiltP_1049479: [WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_TiltP_1049479_0.txt'), WindowsPath('../data/responses/2025_02_26_openai_gpt-4o-2024-11-20_land_prompt/FimbK_TiltP_1049479_1.txt')]
GulbA_Gaidi_1350352: [WindowsPath('.

In [11]:
# let's run get_files on all openai folders
# the key will be folder name and values will be dictionaries returned by get_files
openai_files = {}
for openai_folder in openai_folders:
    openai_files[openai_folder.name] = get_files(openai_folder)

# how many files are in each folder
for key, value in openai_files.items():
    print(f"{key}: {len(value)}")

2025_02_26_openai_gpt-4o-2024-11-20_land_prompt: 20
2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2: 20


In [15]:
# now let's write a function that given a file name and file list and new_subfolder will write consolidated file to new_subfolder
# logic is as follows:
# we want to read all content of files in file list up to empty line
# we want to write all this content to new file in new_subfolder
# then we want to separately read all lines starting with line that starts with "System prompt:"
# we want to write this content only once to new file in new_subfolder
# we want use utf-8 encoding
def consolidate_files(file_name, file_list, new_subfolder):
    # create new subfolder if it does not exist
    new_subfolder.mkdir(parents=True, exist_ok=True)
    with open(new_subfolder / f"{file_name}.txt", "w", encoding="utf-8") as new_file:
        system_prompts = []
        for file in file_list:
            with open(file, "r", encoding="utf-8") as old_file:
                text = old_file.read()
                # let's split on "System prompt:"
                parts = text.split("System prompt:")
                # let's write first part
                new_file.write(parts[0].strip()+"\n")
                # append second part to system_prompts
                system_prompts.append(parts[1])
        # let's write system prompts only once
        # first check if system prompts are identical
        if len(set(system_prompts)) == 1:
            new_file.write("\nSystem prompt:" + system_prompts[0])
        else:
            for system_prompt in system_prompts:
                new_file.write("System prompt:\n" + system_prompt)

# test it on second key of openai_files
# we will create a new subfolder in the data respones folder
# new_subfolder = data_folder / "consolidated"
# consolidate_files(list(openai_files.keys())[1], openai_files[list(openai_files.keys())[1]], new_subfolder)



In [16]:
# now let's write a function that will consolidate all files in all openai folders
# we will use consolidate_files function
# new subfolder will be in data folder
# it will be called consolidated_ + key of openai_files
def consolidate_all_files(openai_files, data_folder):
    for key, value in openai_files.items():
        new_subfolder = data_folder / ("consolidated_" + key)
        # value is a dictionary that contains keys that are first three parts of file name and values that are lists of files
        for key2, value2 in value.items():
            consolidate_files(key2, value2, new_subfolder)

# let's run this function
consolidate_all_files(openai_files, data_folder)

## Getting the subfolders for analysis

In [19]:
# now we want to get all folders that we want to analyze
# they are in data_folder 
# we want those that start with consolidated_2025_02_26 or consolidated_2025_02_27
# we also want those that start with 2025_02_26 or 2025_02_27 and also contain words land_prompt
# these will be the folders that we want to analyze
folders_to_analyze = [f for f in data_folder.iterdir() if f.is_dir() and (f.name.startswith("consolidated_2025_02_26") or f.name.startswith("consolidated_2025_02_27"))]
folders_to_analyze += [f for f in data_folder.iterdir() if f.is_dir() and (f.name.startswith("2025_02_26") or f.name.startswith("2025_02_27")) and "land_prompt" in f.name]
print(f"Folders to analyze:")
for folder in folders_to_analyze:
    print(folder)

Folders to analyze:
..\data\responses\consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt
..\data\responses\consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_1
..\data\responses\2025_02_26_google_gemini-flash-1.5_land_prompt_2
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt
..\data\responses\2025_02_27_google_gemini-2.0-flash-001_land_prompt_2


## Reading Plaintext into  memory

In [9]:
# Plaintexts are in another repo - private in our parent folder
# let's list all text files in data/docs folder
# data_folder = Path("../data/docs")
# data_folder = Path("../../lnb_lat_sen_rom_releases/lat_sen_rom_2025_01_28")
data_folder = Path("../../lnb_lat_sen_rom_releases/lat_sen_rom_2025_02_04")
# assert folder exists
assert data_folder.exists(), f"Folder {data_folder} does not exist"
                   
# list all files
files = list(data_folder.glob("*.txt"))
# print all files
# how many files do we have?
print(f"Number of files: {len(files)}")
# let's load the files into a dictionary with filename stem as key and text as value
# remember to decode the text as utf-8
texts = {}
for file in tqdm(files):
  with open(file, "r", encoding="utf-8") as f:
    texts[file.stem] = f.read()
# how many texts do we have?
print(f"Number of texts: {len(texts)}")
# how many characters do we have in total?
total_chars = sum([len(text) for text in texts.values()])
print(f"Total characters: {total_chars}")
# what is the smallest text?
min_text = min(texts, key=lambda x: len(texts[x]))
print(f"Key for smallest text: {min_text}")
# how many characters does the smallest text have?
min_chars = len(texts[min_text])
print(f"Number of characters in smallest text: {min_chars}")
# what is the largest text?
max_text = max(texts, key=lambda x: len(texts[x]))
print(f"Key for largest text: {max_text}")
# how many characters does the largest text have?
max_chars = len(texts[max_text])
print(f"Number of characters in largest text: {max_chars}")


Number of files: 458


100%|██████████| 458/458 [00:07<00:00, 61.34it/s]

Number of texts: 458
Total characters: 191069647
Key for smallest text: VentA_DepuT_1293527
Number of characters in smallest text: 18648
Key for largest text: DeglA_LabaF_1053655
Number of characters in largest text: 2375090





## Comparing responses

In [23]:
# first let's assert that all our folders have identical file names
# we will use the first folder as reference
reference_files = Path(folders_to_analyze[0]).iterdir()
reference_files = [file.name for file in reference_files]
for folder in folders_to_analyze[1:]:
    files = Path(folder).iterdir()
    files = [file.name for file in files]
    assert reference_files == files, f"Files in {folders_to_analyze[0]} and {folder} are not identical"

print("All files are identical")

All files are identical


In [8]:
# let's write a function that given a file will extract all response lines
# response lines are those that come before empty line
# we will return a list of response lines
def get_response_lines(file):
    response_lines = []
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()
        for line in lines:
            if line.strip() == "":
                break
            response_lines.append(line.strip())
    return response_lines

# test on first file in reference folder
# response_lines = get_response_lines(Path(folders_to_analyze[0]) / reference_files[0])
# print(f"Response lines: {response_lines}")

In [27]:
folders_to_analyze

[WindowsPath('../data/responses/consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt'),
 WindowsPath('../data/responses/consolidated_2025_02_26_openai_gpt-4o-2024-11-20_land_prompt_2'),
 WindowsPath('../data/responses/2025_02_26_google_gemini-flash-1.5_land_prompt_1'),
 WindowsPath('../data/responses/2025_02_26_google_gemini-flash-1.5_land_prompt_2'),
 WindowsPath('../data/responses/2025_02_27_google_gemini-2.0-flash-001_land_prompt'),
 WindowsPath('../data/responses/2025_02_27_google_gemini-2.0-flash-001_land_prompt_2')]

In [12]:
# now let's write a function that given a file and texts dictionary will return a dataframe with two columns
# first column will be terms sorted from get_response_lines (could be duplicates)
# second column will count of occurences of term in text from matching key in texts dictionary
# key will be file name stem
def get_response_df(file, texts):
    response_lines = get_response_lines(file)
    data = []
    plaintext = texts.get(file.stem, "")
    # our term column name will be parent folder name of file
    term_column = file.parent.name
    # term_column = file.stem
    if plaintext == "":
        print(f"Plaintext not found for {file.stem}")
    for line in sorted(response_lines):
        data.append({term_column: line, "count": plaintext.count(line)})
    return pd.DataFrame(data)

# test on first file in reference folder
# df = get_response_df(Path(folders_to_analyze[0]) / reference_files[0], texts)
# print(f"Response dataframe:")
# df.head()

In [28]:
reference_files[0]

'AustA_KaspG_948026.txt'

In [6]:
# now let's write a function tht given a file and list of subfolders will return a combined dataframe
# columns will obtained by horizontally concatenating dataframes obtained by get_response_df
# index will be numerical
def get_combined_df(file, texts, subfolders):
    dfs = []
    for subfolder in subfolders:
        df = get_response_df(subfolder / file, texts)
        dfs.append(df)
    return pd.concat(dfs, axis=1)

# let's test it on first file in reference folder
# df = get_combined_df(reference_files[0], texts, folders_to_analyze)
# print(f"Combined dataframe:")
# df.head()



In [7]:
# now let's create a function that will create a CSV file for each file in reference folder
# we will use get_combined_df to get the dataframe
# we will supply target folder where we want to save the CSV files
def create_csv_files(reference_files, texts, subfolders, target_folder, save_excel=True):
    # create target folder if it does not exist
    target_folder.mkdir(parents=True, exist_ok=True)
    
    for file in reference_files:
        df = get_combined_df(file, texts, subfolders)
        df.to_csv(target_folder / f"{Path(file).stem}.csv", index=False)
        if save_excel:
            df.to_excel(target_folder / f"{Path(file).stem}.xlsx", index=False)

# let's test it on reference files
# target folder will be data folder with name analysis and datetime stamp
# target_folder = Path("../data") / "analysis" / now.strftime("%Y_%m_%d_%H_%M_%S")
# create_csv_files(reference_files, texts, folders_to_analyze, target_folder)

In [32]:
print(target_folder)

..\..\lnb_lat_sen_rom_releases\lat_sen_rom_2025_02_04\analysis\2025_02_27_21_05_04


## Maritime Analysis

In [36]:
# let's get a list of folders that contain words maritime_prompt
data_folder = Path("../data/responses")
maritime_folders = [f for f in data_folder.iterdir() if f.is_dir() and "maritime_prompt" in f.name]
print(f"Maritime folders:")
for folder in maritime_folders:
    print(folder)

Maritime folders:
..\data\responses\2025_02_27_google_gemini-flash-1.5_maritime_prompt


In [40]:
# let's get list of files in first maritime folder
maritime_files = Path(maritime_folders[0]).iterdir()
maritime_files = [file.name for file in maritime_files]
print(f"Maritime files:")
# how many files do we have?
print(f"Number of files: {len(maritime_files)}")
# first 5 files
for file in maritime_files[:5]:
    print(file)

Maritime files:
Number of files: 458
AizsV_MilaU_1049452.txt
AkurJ_DegoS_771400.txt
AkurJ_PeteD_886346.txt
AkurJ_UgunZ_1049441.txt
Andra_Elita_1053573.txt


In [41]:
# let's create_csv_files for maritime files
# target folder will be data folder with name analysis_maritime and datetime stamp
target_folder = Path("../data") / "analysis_maritime" / now.strftime("%Y_%m_%d_%H_%M_%S")   
create_csv_files(maritime_files, texts, maritime_folders, target_folder)

## Air transport Analysis

In [3]:
# let's get subfolder that contains words air_prompt
data_folder = Path("../data/responses")
air_folders = [f for f in data_folder.iterdir() if f.is_dir() and "air_prompt" in f.name]
print(f"Air folders:")
for folder in air_folders:
    print(folder)

Air folders:
..\data\responses\2025_02_28_google_gemini-flash-1.5_air_prompt


In [10]:
# we need to get a list of files in first air folder
air_files = Path(air_folders[0]).iterdir()
air_files = [file.name for file in air_files]

In [13]:
# now let's create csv files for air folders
# target folder will be data folder with name analysis_air and datetime stamp
target_folder = Path("../data") / "analysis_air" / now.strftime("%Y_%m_%d_%H_%M_%S")
create_csv_files(air_files, texts, air_folders, target_folder, save_excel=False)

## Combining responses into unique response document frequency counts

Next we will combine all responses from specific prompt into a single dataframe. We will use a parquet file with lemmatized results to compare against.

In [16]:
# let's load parquet from outside our repository
src = Path("../../not_repo/latsenrom_2025_02_05.parquet")
# assert file exists
assert src.exists(), f"File {src} does not exist"
df = pd.read_parquet(src)
# shape
print(f"Shape: {df.shape}")
# sample
df.sample(5)

Shape: (37605476, 17)


Unnamed: 0,deprel,form,index,lemma,parent,pos,tag,ufeats,upos,sent_ndx,author,title,dom_id,file_stem,file_stem_short,firstEdition,term
783722,,dažas,17,daža,,pi0fpa_,pi0fpan,,,978,AustA,GaraJ,1025406,AustA_GaraJ_1025406,AustA_GaraJ,1926,daža
20251178,root,iebrēcās,16,iebrēkties,0.0,vm_is__30__,vmyis_130an,Evident=Fh|Mood=Ind|Person=3|Polarity=Pos|Refl...,VERB,865,LeitA,TrimA,1296709,LeitA_TrimA_1296709,LeitA_TrimA,1933,iebrēkties
12905910,root,papīrīts,5,papīrīt,0.0,vm_pdmsn_sn,vmnpdmsnpsnpn,Aspect=Perf|Case=Nom|Definite=Ind|Degree=Pos|G...,VERB,909,JaunJ,NaveD,1053660,JaunJ_NaveD_1053660,JaunJ_NaveD,1924,papīrīt
6965353,,nebūtībā,16,nebūtība,,ncfsl_,ncfsl4,,,484,ErssA,MileV,1047111,ErssA_MileV_1047111,ErssA_MileV,1930,nebūtība
23745117,,klajā,10,klajā,,r_p,r0_,,,133,PaulM,UzDzi,1049498,PaulM_UzDzi_1049498,PaulM_UzDzi,1938,klajā


In [None]:
# let's compare set of file stems in our anaylsis folder with set of stems in our parquet file
# we will use set comprehension
# we will use set comprehension

# list of files in our target folder
air_analysis_files = list(target_folder.iterdir())
# set of stems in our target folder
air_analysis_stems = {file.stem for file in air_analysis_files}
# set of stems in our parquet file
parquet_stems = set(df["file_stem"].unique())
# how many total of each?
print(f"Total stems in air analysis folder: {len(air_analysis_stems)}")
print(f"Total stems in parquet file: {len(parquet_stems)}")
# let's see the difference
print(f"Stems in air analysis folder but not in parquet file: {air_analysis_stems - parquet_stems}")
print(f"Stems in parquet file but not in air analysis folder: {parquet_stems - air_analysis_stems}")

# we can see that there have been some changes in the file names but it should not meaningfully affect our analysis


Total stems in air analysis folder: 458
Total stems in parquet file: 485
Stems in air analysis folder but not in parquet file: {'RoziP_DivaS_1053490', 'JansJ_Dzimt_1051747', 'ZariK_DzivU_1051770', 'MateJ_PatrU_1293506', 'Anoni_BandK_419229', 'CeplA_Zeme_882015', 'JaunJ_Aija_656227', 'KaijI_IedzG_1296798', 'NiedA_LiduD_907728', 'VecoJ_ViktH_1296782', 'DeglA_Zelte_414397', 'JaunJ_BaltG_413159', 'KaijI_Juga_886317', 'UpitA_JaunA_103001', 'NiedA_Siksp_1544155', 'KaudR_MernL_771080', 'EldgH_ZvaiN_771102', 'DeglA_Patri_66131', 'JatnG_TevPi_1049477', 'ZeibJ_BaroB_1293562', 'KaijI_Dzint_1053686', 'UpitA_ZidaT_869211', 'KaijI_Sfink_886333', 'SpriJ_NaveL_1053548', 'MateJ_SadzV_416277'}
Stems in parquet file but not in air analysis folder: {'JansJ_Dzim_1051747', 'JatnG_TevPi_049477', 'KaudR_MernL_413085', 'KabeV_DzelD_1049482', 'UpitA_ZemNa_102999', 'UpitA_ZidaT_869228', 'FimbK_PecPu_1051725', 'UpitA_JaunA_1040993', 'RudzE_CaurE_886344', 'CeplA_Zeme1_882015', 'RoziP_DivaS_957619', 'NiedAn_LiduD_4

In [20]:
target_folder

WindowsPath('../data/analysis_air/2025_02_28_17_50_54')

In [39]:
from pandas.errors import EmptyDataError # we need to import this error
# now let's create a function that takes a subfolder with responses and returns a dataframe with unique terms and their combined counts
# to do so we will go through all csv files in subfolder
# we will extract rows with unique terms and their counts
# and then we will merge them into a single dataframe by adding counts
# we will return this dataframe
def get_combined_response_df(subfolder, verbose=False):
    dfs = []
    for file in tqdm(subfolder.iterdir()):
        if file.suffix == ".csv":
            try:
                df = pd.read_csv(file)
            except EmptyDataError:
                print(f"Empty data error for {file}")
                continue
            # drop duplicates
            df = df.drop_duplicates()
            # name first column term
            df.columns = ["term", "term_freq"]
            # we also want to add doc_freq column with value 1
            df["doc_freq"] = 1
            # df = df.groupby("term").sum().reset_index()
            dfs.append(df)
    if verbose:
        # print shape of first 5 dataframes
        for df in dfs[:5]:
            print(df.shape)
    df = pd.concat(dfs).groupby("term").sum().reset_index()
    # sort alphabetically
    # df = df.sort_values("term_freq", ascending=False)
    df = df.sort_values("doc_freq", ascending=False)
    return df



In [37]:
# let's test it on first air folder
air_df = get_combined_response_df(target_folder)
# shape
print(f"Shape: {air_df.shape}")
#  head 10
air_df.head(10)

458it [00:00, 948.89it/s]

Shape: (1744, 3)





Unnamed: 0,term,term_freq,doc_freq
1504,vilciens,566,102
771,laivas,925,95
160,auto,1875,83
582,kamanas,149,71
1176,rati,2222,63
727,kuģis,480,60
786,laivu,502,56
825,lidmašīna,546,52
722,kuģi,1513,52
1386,tvaikonis,158,50


In [28]:
# let's save this dataframe to a csv file in parent of target folder
# the file name will be air_combined with datetime stamp and then .csv
# we will save it without index

# target file
target_file = Path(target_folder).parent / f"air_combined_{now.strftime('%Y_%m_%d_%H_%M_%S')}.csv"
# save
air_df.to_csv(target_file, index=False)

## Extracting tf and df for maritime responses

In [29]:
# let's do the same for maritime folders
maritime_target_parent = Path("../data") / "analysis_maritime" 
# get subfolders
maritime_folders = [f for f in maritime_target_parent.iterdir() if f.is_dir()]
# print
print(f"Maritime folders:")
for folder in maritime_folders:
    print(folder)

Maritime folders:
..\data\analysis_maritime\2025_02_27_21_05_04


In [32]:

maritime_target_folder = Path(maritime_folders[0])
maritime_target_folder

WindowsPath('../data/analysis_maritime/2025_02_27_21_05_04')

In [40]:
maritime_df = get_combined_response_df(maritime_target_folder, verbose=True)
# shape
print(f"Shape: {maritime_df.shape}")
#  head 10
maritime_df.head(10)

604it [00:00, 1956.77it/s]

Empty data error for ..\data\analysis_maritime\2025_02_27_21_05_04\RozeL_StipK_964055.csv


916it [00:01, 498.51it/s] 

(11, 3)
(5, 3)
(7, 3)
(14, 3)
(2, 3)
Shape: (1240, 3)





Unnamed: 0,term,term_freq,doc_freq
560,laivas,1146,183
492,kuģis,770,138
485,kuģi,2492,127
843,rati,2821,86
580,laivu,590,78
581,laivā,521,65
1003,tvaikonis,196,64
1118,vilciens,362,63
397,kamanas,141,60
558,laiva,944,50


In [41]:
# let's save results to maritime_target_parent
# target file will have name maritime_term_tf_doc_tf with datetime stamp and then .csv
# we will save it without index
target_file = maritime_target_parent / f"maritime_term_tf_doc_tf_{now.strftime('%Y_%m_%d_%H_%M_%S')}.csv"
# save
maritime_df.to_csv(target_file, index=False)