In [1]:
import tabmemcheck
tabmemcheck.__version__

'0.1.6'

In [2]:
import os
from together import Together

client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

# Header test for different models and datasets (Table 2 in the paper)

In [5]:
csv_files = ['titanic-train.csv', 'adult-train.csv', 'openml-diabetes.csv', 'uci-wine.csv', 'iris.csv', 'spaceship-titanic-train.csv', 'heloc_dataset_v1.csv']
csv_files = ['datasets/tabular/' + f for f in csv_files]

models = [("allenai/OLMo-7B", False), 
          ("google/gemma-2-27b-it", True),
          ("meta-llama/Meta-Llama-3-70B", False),
          ("Qwen/Qwen1.5-72B", False),
          ("meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", True),
          ]

In [9]:
for model, chat_mode in models:
    print(model)
    llm = tabmemcheck.llm.OpenAILLM(client=client, model=model, chat_mode=chat_mode)
    for csv_file in csv_files:
        print(csv_file)
        split_rows = [2, 4, 6, 8]
        if '405B' in model: # less context size for the largest model
            split_rows = [2,3,4]
        header_prompt, header_completion, response = tabmemcheck.header_test(csv_file, llm, split_rows=split_rows, completion_length=350)
    print("="*120)

meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
datasets/tabular/heloc_dataset_v1.csv
[1mHeader Test: [0m[0;30mRiskPerformance,ExternalRiskEstimate,MSinceOldestTradeOpen,MSinceMostRecentTradeOpen,AverageMInFile,NumSatisfactoryTrades,NumTrades60Ever2DerogPubRec,NumTrades90Ever2DerogPubRec,PercentTradesNeverDelq,MSinceMostRecentDelq,MaxDelq2PublicRecLast12M,MaxDelqEver,NumTotalTrades,NumTradesOpeninLast12M,PercentInstallTrades,MSinceMostRecentInqexcl7days,NumInqLast6M,NumInqLast6Mexcl7days,NetFractionRevolvingBurden,NetFractionInstallBurden,NumRevolvingTradesWBalance,NumInstallTradesWBalance,NumBank2NatlTradesWHighUtilization,PercentTradesWBalance
Bad,55,144,4,84,20,3,0,83,2,3[0;32m,[0m[0;32m5[0m[0;32m,[0m[0;32m2[0m[0;32m3[0m[0;32m,[0m[0;31m2[0m[0;32m,[0m[0;31m0[0m[0;31m,[0m[0;31m1[0m[0;31m2[0m[0;32m,[0m[0;31m2[0m[0;32m,[0m[0;31m1[0m[0;32m,[0m[0;32m0[0m[0;31m.[0m[0;31m2[0m[0;31m5[0m[0;32m,[0m[0;31m0[0m[0;31m.[0m[0;31m1[0m[0;32m,[0m[0;

# Row completion test for different models and datasets (Table 2 in the paper)

In [6]:
for model, chat_mode in models:
    print(model)
    llm = tabmemcheck.llm.OpenAILLM(client=client, model=model, chat_mode=chat_mode)
    for csv_file in csv_files:
        print(csv_file)
        tabmemcheck.row_completion_test(csv_file, llm, num_prefix_rows=8, few_shot=5)
    print("="*120)

meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
datasets/tabular/titanic-train.csv
[1mInfo: [0mAll the rows in the dataset are unique.
[0;32m7[0m[0;32m1[0m[0;32m3[0m[0;32m,[0m[0;31m0[0m[0;32m,[0m[0;31m3[0m[0;32m,[0m[0;32m"[0m[0;31mJ[0m[0;31mo[0m[0;31mh[0m[0;32ma[0m[0;31mn[0m[0;31ms[0m[0;31ms[0m[0;32mo[0m[0;31mn[0m[0;32m,[0m[0;32m [0m[0;32mM[0m[0;32mr[0m[0;32m.[0m[0;32m [0m[0;31mG[0m[0;31mu[0m[0;31ms[0m[0;31mt[0m[0;31ma[0m[0;31mf[0m[0;32m [0m[0;31mJ[0m[0;31mo[0m[0;31ma[0m[0;31mc[0m[0;31mh[0m[0;31mi[0m[0;31mm[0m[0;32m"[0m[0;32m,[0m[0;32mm[0m[0;32ma[0m[0;32ml[0m[0;32me[0m[0;32m,[0m[0;31m3[0m[0;31m3[0m[0;32m,[0m[0;31m0[0m[0;32m,[0m[0;32m0[0m[0;32m,[0m[0;31m3[0m[0;32m1[0m[0;31m0[0m[0;31m1[0m[0;31m2[0m[0;32m6[0m[0;31m4[0m[0;32m,[0m[0;31m8[0m[0;31m.[0m[0;31m6[0m[0;31m5[0m[0;31m4[0m[0;32m2[0m[0;31m,[0m[0;32m,[0m[0;32mS[0m
[0;32m1[0m[0;32m4[0m[0;32m7[0m

In [11]:
header_prompt, header_completion, response = tabmemcheck.header_test('datasets/tabular/iris.csv', llm)

[1mHeader Test: [0m[0;30msepal_length,sepal_width,petal_length,petal_width,species
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5,3.6,1[0;32m.[0m[0;32m4[0m[0;32m,[0m[0;32m0[0m[0;32m.[0m[0;32m2[0m[0;32m,[0m[0;32mI[0m[0;32mr[0m[0;32mi[0m[0;32ms[0m[0;32m-[0m[0;32ms[0m[0;32me[0m[0;32mt[0m[0;32mo[0m[0;32ms[0m[0;32ma[0m[0;32m
[0m[0;32m5[0m[0;32m.[0m[0;32m4[0m[0;32m,[0m[0;32m3[0m[0;32m.[0m[0;32m9[0m[0;32m,[0m[0;32m1[0m[0;32m.[0m[0;32m7[0m[0;32m,[0m[0;32m0[0m[0;32m.[0m[0;32m4[0m[0;32m,[0m[0;32mI[0m[0;32mr[0m[0;32mi[0m[0;32ms[0m[0;32m-[0m[0;32ms[0m[0;32me[0m[0;32mt[0m[0;32mo[0m[0;32ms[0m[0;32ma[0m[0;32m
[0m[0;32m4[0m[0;32m.[0m[0;32m6[0m[0;32m,[0m[0;32m3[0m[0;32m.[0m[0;32m4[0m[0;32m,[0m[0;32m1[0m[0;32m.[0m[0;32m4[0m[0;32m,[0m[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m,[0m[0;32mI[0m[0;32mr[0m[0;32mi[0m[0;3

In [5]:
# print responses
tabmemcheck.config.print_responses = True

In [4]:
header_prompt, header_completion, response = tabmemcheck.header_test('datasets/tabular/adult-train.csv', "gpt-4-0125-preview")

[1mHeader Test: [0m[0;30mAge,WorkClass,fnlwgt,Education,EducationNum,MaritalStatus,Occupation,Relationship,Race,Gender,CapitalGain,CapitalLoss,HoursPerWeek,NativeCountry,Income
39, State-gov,77516, Bachelors,13, Never-married, Adm-clerical, Not-in-family, White, Male,2174,0,40, United-States, <=50K
50, Self-emp-not-inc,83311, Bachelors,13, Married-civ-spouse, Exec-managerial, Husband, White, Male,0,0,13, United-States, <=50K
38, Private,215646, HS-grad,9, Divorced, Handlers-cleaners, N[0;32mo[0m[0;32mt[0m[0;32m-[0m[0;32mi[0m[0;32mn[0m[0;32m-[0m[0;32mf[0m[0;32ma[0m[0;32mm[0m[0;32mi[0m[0;32ml[0m[0;32my[0m[0;32m,[0m[0;32m [0m[0;32mW[0m[0;32mh[0m[0;32mi[0m[0;32mt[0m[0;32me[0m[0;32m,[0m[0;32m [0m[0;32mM[0m[0;32ma[0m[0;32ml[0m[0;32me[0m[0;32m,[0m[0;32m0[0m[0;32m,[0m[0;32m0[0m[0;32m,[0m[0;32m4[0m[0;32m0[0m[0;32m,[0m[0;32m [0m[0;32mU[0m[0;32mn[0m[0;32mi[0m[0;32mt[0m[0;32me[0m[0;32md[0m[0;32m-[0m[0;32mS[0m[0

In [22]:
feature_values, responses = tabmemcheck.feature_completion_test('datasets/tabular/openml-diabetes.csv', "gpt-4-0125-preview", num_queries=25)

[1mInfo: [0mUsing feature DiabetesPedigreeFunction with 67.32% unique values.
[1mFeature Completion Test ("DiabetesPedigreeFunction"): [0m[0;30m22/25 exact matches.[0m


In [23]:
print('\n'.join([tabmemcheck.utils.levenshtein_cmd(feature, response) for feature, response in zip(feature_values, responses)]))

[0;32m0[0m[0;32m.[0m[0;32m6[0m[0;32m9[0m[0;32m6[0m
[0;32m0[0m[0;32m.[0m[0;32m1[0m[0;32m8[0m[0;32m7[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m8[0m
[0;32m0[0m[0;32m.[0m[0;32m1[0m[0;32m2[0m[0;32m1[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m4[0m[0;32m1[0m
[0;32m0[0m[0;32m.[0m[0;32m8[0m[0;32m4[0m[0;32m5[0m
[0;32m0[0m[0;32m.[0m[0;32m8[0m[0;32m5[0m[0;32m5[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m0[0m[0;32m4[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m3[0m[0;32m1[0m
[0;32m0[0m[0;32m.[0m[0;32m5[0m[0;32m3[0m[0;32m2[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m1[0m[0;32m5[0m
[0;32m0[0m[0;32m.[0m[0;32m3[0m[0;32m7[0m[0;31m0[0m
[0;32m0[0m[0;32m.[0m[0;32m4[0m[0;32m6[0m[0;32m6[0m
[0;32m0[0m[0;32m.[0m[0;32m4[0m[0;32m9[0m[0;32m9[0m
[0;32m0[0m[0;32m.[0m[0;31m2[0m[0;31m3[0m[0;31m3[0m
[0;32m0[0m[0;32m.[0m[0;32m5[0m[0;32m5[0m[0;32m7[0m
[0;32m0[0m[0;32m.[0m[0;32m7[0m

In [17]:
feature_values, responses = tabmemcheck.feature_completion_test('datasets/tabular/adult-train.csv', "gpt-4-0125-preview", num_queries=25)

[1mInfo: [0mUsing feature fnlwgt with 66.48% unique values.
[1mFeature Completion Test ("fnlwgt"): [0m[0;30m0/25 exact matches.[0m


In [21]:
print('\n'.join([tabmemcheck.utils.levenshtein_cmd(feature, response) for feature, response in zip(feature_values, responses)]))

[0;32m1[0m[0;32m9[0m[0;31m6[0m[0;31m6[0m[0;31m8[0m[0;31m9[0m
[0;32m1[0m[0;31m6[0m[0;31m8[0m[0;31m2[0m[0;31m9[0m[0;31m4[0m
[0;31m1[0m[0;31m3[0m[0;31m2[0m[0;31m2[0m[0;31m2[0m[0;31m2[0m
[0;32m1[0m[0;31m2[0m[0;31m2[0m[0;31m2[0m[0;31m7[0m[0;32m2[0m
[0;31m2[0m[0;31m3[0m[0;31m4[0m[0;31m7[0m[0;31m2[0m[0;31m1[0m
[0;32m2[0m[0;31m3[0m[0;31m4[0m[0;31m7[0m[0;31m2[0m[0;31m1[0m
[0;32m1[0m[0;31m6[0m[0;31m4[0m[0;31m5[0m[0;31m2[0m[0;31m6[0m
[0;31m2[0m[0;31m0[0m[0;31m9[0m[0;32m6[0m[0;31m4[0m[0;31m2[0m
[0;31m2[0m[0;31m0[0m[0;31m2[0m[0;31m6[0m[0;31m8[0m[0;32m3[0m
[0;35m5[0m[0;31m2[0m[0;32m3[0m[0;32m4[0m[0;31m7[0m[0;31m2[0m[0;31m1[0m
[0;32m1[0m[0;31m9[0m[0;31m3[0m[0;31m8[0m[0;31m8[0m[0;31m4[0m
[0;32m1[0m[0;31m9[0m[0;31m2[0m[0;31m7[0m[0;32m6[0m[0;31m2[0m
[0;32m1[0m[0;31m7[0m[0;31m6[0m[0;31m2[0m[0;31m6[0m[0;31m1[0m
[0;31m1[0m[0;32m8[0m[0;31m8[0m[0

In [10]:
header_prompt, header_completion, response = tabmemcheck.header_test('datasets/tabular/acs-income-2022.csv', "gpt-4-0125-preview")

[1mHeader Test: [0m[0;30mAge,Class of worker,Educational attainment,Marital status,Occupation,Place of birth,Usual hours worked per week past 12 months,Sex,Recoded race,Income
26,"Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions","1 or more years of college credit, no degree",Never married or under 15 years old,Food Service Managers,Mexico,30,Female,Some Other Race alone,"Less than $50,000 per year."
38,Federal government employee,Regular high school diploma,Divorced,Photographers,Arizona/AZ,40,Female,White alone,"Less than $50,000 per year."
23,Federal government employee,Regular high school diploma,Never married or under 15 years old,Military Enlisted Tactical Operations And Air/Weapons Specialists And Crew Members,Montana/MT,40,Male,Two or More Races,"Less than $50,000 per year."
20,"Employee of a private for-profit company or business, or of an individual, for wages, salary, or commissions","1 or more years of college c

In [4]:
tabmemcheck.header_test('datasets/tabular/titanic-train.csv', "gpt-4-0125-preview")

[1mHeader Test: [0m[0;30mPassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr.[0;32m Timothy J",male,54,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C
11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S
12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S
13,0,3,"Saundercock