In [4]:
# load the large language model file ()
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os.path

if not os.path.exists("./llama-2-13b-chat.Q4_K_M.gguf"):
  model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
  model_basename = "llama-2-13b-chat.Q4_K_M.gguf"
  model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

  LLM = Llama(model_path=model_path, n_gpu_layers=50) # if "n_gpu_layers" is omitted then CPU will be used
else:
  LLM = Llama(model_path="./llama-2-13b-chat.Q4_K_M.gguf", n_gpu_layers=10)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from ./llama-2-13b-chat.Q4_K_M.gguf (version GGUF V2 (latest))
llama_model_loader: - tensor    0:                token_embd.weight q4_K     [  5120, 32000,     1,     1 ]
llama_model_loader: - tensor    1:           blk.0.attn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    2:            blk.0.ffn_down.weight q6_K     [ 13824,  5120,     1,     1 ]
llama_model_loader: - tensor    3:            blk.0.ffn_gate.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    4:              blk.0.ffn_up.weight q4_K     [  5120, 13824,     1,     1 ]
llama_model_loader: - tensor    5:            blk.0.ffn_norm.weight f32      [  5120,     1,     1,     1 ]
llama_model_loader: - tensor    6:              blk.0.attn_k.weight q4_K     [  5120,  5120,     1,     1 ]
llama_model_loader: - tensor    7:         blk.0.attn_output.weight q4_K     [  5120,  5120,     1,     

In [5]:
# create a text prompt
f = open("./prompt.txt", "r")
prompt = f.read()

# generate a response (takes several seconds)
output = LLM(prompt, max_tokens=512)

# display the response
print(output["choices"][0]["text"])



Note: The files are not necessarily named the same way, and they may have different number of rows and columns.
```
def read_and_combine_csv(folder_path):
    df_list = []
    for filename in os.listdir(folder_path):
        df = pd.read_csv(os.path.join(folder_path, filename))
        df_list.append(df)
    
    return pd.concat(df_list)
```
This is the function that I came up with. It reads each csv file in the folder and appends it to a list of dataframes. Then, it concatenates all the dataframes in the list into a single dataframe and returns it.

However, when I run this function on my real dataset, which contains thousands of csv files, it takes a very long time to finish. I'm wondering if there is a more efficient way to do this.

Here are some ideas that come to mind:

1. Use `pd.read_csv` with `chunksize` parameter set to a large value, so that it reads the file in larger chunks and reduces the number of disk accesses.
2. Use `pd.read_csv` with `orient='split'`, which allows


llama_print_timings:        load time =  2629.09 ms
llama_print_timings:      sample time =  1349.83 ms /   464 runs   (    2.91 ms per token,   343.75 tokens per second)
llama_print_timings: prompt eval time =  2628.89 ms /    41 tokens (   64.12 ms per token,    15.60 tokens per second)
llama_print_timings:        eval time = 36734.28 ms /   463 runs   (   79.34 ms per token,    12.60 tokens per second)
llama_print_timings:       total time = 44059.90 ms


In [3]:
import parse

# if the response format is "... \begin{code}... _(Python program)_ ... \end{code} ..."
result=parse.parse('{0}begin{{code}}\n{program}\\end{{code}}{1}', output["choices"][0]["text"])
if result == None:
  # if the response format is "... ``` ..._(Python program)_... ``` ..."
  result=parse.parse('{0}```{program}```{2}', output["choices"][0]["text"])
if result == None:
  result = ''

print(result['program'])

with open("response.py", "w") as text_file:
    print(result['program'], file=text_file)


import os
import pandas as pd

def combine_csv():
    # Iterate through all files in the folder
    for file in os.listdir("folder"):
        # Read each file as a dataframe
        df = pd.read_csv(os.path.join("folder", file))
        
        # Combine all dataframes into a single one
        combined_df = pd.concat([df for df in [df] ], ignore_index=True)
        
        # Return the combined dataframe
        return combined_df

