In [30]:
import pandas as pd
pd.options.display.max_columns=1000
pd.options.display.width=200
pd.options.display.min_rows=60
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing

from datetime import datetime

In [52]:
df =  pd.read_csv('clinvar_conflicting.csv')
df.head()

Unnamed: 0,CHROM,POS,REF,ALT,AF_ESP,AF_EXAC,AF_TGP,CLNDISDB,CLNDISDBINCL,CLNDN,CLNDNINCL,CLNHGVS,CLNSIGINCL,CLNVC,CLNVI,MC,ORIGIN,SSR,CLASS,Allele,Consequence,IMPACT,SYMBOL,Feature_type,Feature,BIOTYPE,EXON,INTRON,cDNA_position,CDS_position,Protein_position,Amino_acids,Codons,DISTANCE,STRAND,BAM_EDIT,SIFT,PolyPhen,MOTIF_NAME,MOTIF_POS,HIGH_INF_POS,MOTIF_SCORE_CHANGE,LoFtool,CADD_PHRED,CADD_RAW,BLOSUM62
0,1,955563,G,C,0.0,0.0,0.0,"MedGen:C3808739,OMIM:615120|MedGen:CN169374",,"Myasthenic_syndrome,_congenital,_8|not_specified",,NC_000001.10:g.955563G>C,,single_nucleotide_variant,,SO:0001583|missense_variant,1.0,,0,C,missense_variant,MODERATE,AGRN,Transcript,NM_001305275.1,protein_coding,1/39,,61,11,4,R/P,cGg/cCg,,1.0,,,,,,,,0.421,11.39,1.133255,-2.0
1,1,955597,G,T,0.0,0.42418,0.2826,MedGen:CN169374,,not_specified,,NC_000001.10:g.955597G>T,,single_nucleotide_variant,,SO:0001819|synonymous_variant,1.0,,0,T,synonymous_variant,LOW,AGRN,Transcript,NM_001305275.1,protein_coding,1/39,,95,45,15,P,ccG/ccT,,1.0,,,,,,,,0.421,8.15,0.599088,
2,1,955619,G,C,0.0,0.03475,0.0088,"MedGen:C3808739,OMIM:615120|MedGen:CN169374",,"Myasthenic_syndrome,_congenital,_8|not_specified",,NC_000001.10:g.955619G>C,,single_nucleotide_variant,,SO:0001583|missense_variant,1.0,,1,C,missense_variant,MODERATE,AGRN,Transcript,NM_001305275.1,protein_coding,1/39,,117,67,23,V/L,Gtc/Ctc,,1.0,,,,,,,,0.421,3.288,0.069819,1.0
3,1,957640,C,T,0.0318,0.02016,0.0328,"MedGen:C3808739,OMIM:615120|MedGen:CN169374",,"Myasthenic_syndrome,_congenital,_8|not_specified",,NC_000001.10:g.957640C>T,,single_nucleotide_variant,,SO:0001819|synonymous_variant,1.0,,0,T,synonymous_variant,LOW,AGRN,Transcript,NM_001305275.1,protein_coding,2/39,,311,261,87,D,gaC/gaT,,1.0,,,,,,,,0.421,12.56,1.356499,
4,1,976059,C,T,0.0,0.00022,0.001,MedGen:CN169374,,not_specified,,NC_000001.10:g.976059C>T,,single_nucleotide_variant,,SO:0001819|synonymous_variant,1.0,,1,T,synonymous_variant,LOW,AGRN,Transcript,NM_001305275.1,protein_coding,4/39,,576,526,176,L,Ctg/Ttg,,1.0,,,,,,,,0.421,17.74,2.234711,


In [53]:
#select only columns with int or float data types
df = df.select_dtypes(['number'])
#drop any columns with null values
df.dropna(axis=1,inplace=True)
var_class = df.pop('CLASS')

Unnamed: 0,POS,AF_ESP,AF_EXAC,AF_TGP,CLASS
0,955563,0.0,0.0,0.0,0
1,955597,0.0,0.42418,0.2826,0
2,955619,0.0,0.03475,0.0088,1
3,957640,0.0318,0.02016,0.0328,0
4,976059,0.0,0.00022,0.001,1


In [5]:
def compare_dicts(a,b,ignore=['test_score','train_score']):
    a = dict(a)
    b = dict(b)
    for k in ignore:
        a.pop(k,None)
        b.pop(k,None)
        
    return tuple(a.items()) == tuple(b.items())

In [15]:
hyperparam_table = []

In [20]:
exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'no_scaling'}) 
              for a in hyperparam_table])

logisticRegr = LogisticRegression()
if not exists:
    x_train,x_test,y_train,y_test = train_test_split(df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'no_scaling'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp
    
    
df_hyp = pd.DataFrame(hyperparam_table)



In [21]:
df_hyp.head(10)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0


In [48]:
#Now, I will build the hyperparameter table with different csv files at different stages of data cleaning and feature engineering,
#to reveal which feature engineering and data cleaning steps are improving model performance
#Additionally, I am distinguishing between min_max scaling and no scaling

In [24]:
df1 =  pd.read_csv('data_cleaned1.csv')
#select only columns with int or float data types
df1 = df1.select_dtypes(['number'])
#drop any columns with null values
df1.dropna(axis=1,inplace=True)
#create a series for the binary result being predicted
var_class = df1.pop('CLASS')

exists = any([compare_dicts(a,b={'test_size': 0.1, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'no_scaling'}) 
              for a in hyperparam_table])

logisticRegr = LogisticRegression()
if not exists:
    x_train,x_test,y_train,y_test = train_test_split(df,var_class,test_size=0.1,random_state=0)
    hyperparam_table += [{'test_size': 0.1, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'no_scaling'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp
    
    
df_hyp = pd.DataFrame(hyperparam_table)



In [25]:
df_hyp.head()

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0


In [26]:
df = pd.read_csv('clinvar_conflicting.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
mask = df['CLASS'] == 1
df_minority = df[mask]
df_majority = df[~mask]
df_minority = df_minority.sample(len(df_majority),replace=True)
df = pd.concat([df_minority,df_majority])
var_class = df.pop('CLASS')

scaler = preprocessing.MinMaxScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df,columns=df.columns)


exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'min_max'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(scaled_df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'min_max'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
df = pd.read_csv('data_cleaned1.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
mask = df['CLASS'] == 1
df_minority = df[mask]
df_majority = df[~mask]
df_minority = df_minority.sample(len(df_majority),replace=True)
df = pd.concat([df_minority,df_majority])
var_class = df.pop('CLASS')

scaler = preprocessing.MinMaxScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df,columns=df.columns)


exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'min_max'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(scaled_df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'min_max'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [29]:
df_hyp = pd.DataFrame(hyperparam_table)
df_hyp.head(10)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
5,0.05,0,"(97508, 4)",min_max,0.582855,0.572534,510,1880,154,2332
6,0.05,0,"(97508, 44)",min_max,0.584085,0.584755,1036,1354,674,1812


In [39]:
df = pd.read_csv('data_cleaned1.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
var_class = df.pop('CLASS')

scaler = preprocessing.MinMaxScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df,columns=df.columns)


exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'min_max'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(scaled_df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'min_max'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [40]:
df_hyp = pd.DataFrame(hyperparam_table)
df_hyp.head(10)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
5,0.05,0,"(97508, 4)",min_max,0.582855,0.572534,510,1880,154,2332
6,0.05,0,"(97508, 44)",min_max,0.584085,0.584755,1036,1354,674,1812
7,0.05,0,"(65188, 44)",min_max,0.743252,0.74824,2422,0,837,1


In [41]:
df = pd.read_csv('data_cleaned1.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
mask = df['CLASS'] == 1
df_minority = df[mask]
df_majority = df[~mask]
df_minority = df_minority.sample(len(df_majority),replace=True)
df = pd.concat([df_minority,df_majority])
var_class = df.pop('CLASS')




exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'no_scaling'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'no_scaling'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [42]:
df_hyp = pd.DataFrame(hyperparam_table)
df_hyp.head(10)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
5,0.05,0,"(97508, 4)",min_max,0.582855,0.572534,510,1880,154,2332
6,0.05,0,"(97508, 44)",min_max,0.584085,0.584755,1036,1354,674,1812
7,0.05,0,"(65188, 44)",min_max,0.743252,0.74824,2422,0,837,1
8,0.05,0,"(97508, 44)",no_scaling,0.509844,0.499482,0,2390,0,2486


In [43]:
df = pd.read_csv('data_cleanednlp2.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
mask = df['CLASS'] == 1
df_minority = df[mask]
df_majority = df[~mask]
df_minority = df_minority.sample(len(df_majority),replace=True)
df = pd.concat([df_minority,df_majority])
var_class = df.pop('CLASS')

scaler = preprocessing.MinMaxScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df,columns=df.columns)


exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'min_max'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(scaled_df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'min_max'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [44]:
df_hyp = pd.DataFrame(hyperparam_table)
df_hyp.head(10)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
5,0.05,0,"(97508, 4)",min_max,0.582855,0.572534,510,1880,154,2332
6,0.05,0,"(97508, 44)",min_max,0.584085,0.584755,1036,1354,674,1812
7,0.05,0,"(65188, 44)",min_max,0.743252,0.74824,2422,0,837,1
8,0.05,0,"(97508, 44)",no_scaling,0.509844,0.499482,0,2390,0,2486
9,0.05,0,"(97508, 56)",min_max,0.589212,0.591275,1132,1258,745,1741


In [45]:
df = pd.read_csv('data_cleaned3.csv')
df = df.select_dtypes(['number'])
df.dropna(axis=1,inplace=True)
mask = df['CLASS'] == 1
df_minority = df[mask]
df_majority = df[~mask]
df_minority = df_minority.sample(len(df_majority),replace=True)
df = pd.concat([df_minority,df_majority])
var_class = df.pop('CLASS')

scaler = preprocessing.MinMaxScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df,columns=df.columns)


exists = any([compare_dicts(a,b={'test_size': 0.05, 
                                'random_state': 0, 
                                'data_size': str(df.shape),
                                'scaling':'min_max'}) 
              for a in hyperparam_table])

if not exists:
    x_train,x_test,y_train,y_test = train_test_split(scaled_df,var_class,test_size=0.05,random_state=0)
    hyperparam_table += [{'test_size': 0.05, 
                          'random_state': 0, 
                          'data_size': str(df.shape),
                          'scaling':'min_max'}]
    
    logisticRegr.fit(x_train,y_train)
    
    predictions_test = logisticRegr.predict(x_test)
    predictions_train = logisticRegr.predict(x_train)
    
    score = logisticRegr.score(x_test,y_test)
    hyperparam_table[-1]['test_score'] = score
    
    training_score = logisticRegr.score(x_train,y_train)
    hyperparam_table[-1]['train_score'] = training_score
    
    tn, fp, fn, tp = confusion_matrix(y_test,predictions_test).ravel()
    hyperparam_table[-1]['tn'] = tn
    hyperparam_table[-1]['fp'] = fp
    hyperparam_table[-1]['fn'] = fn
    hyperparam_table[-1]['tp'] = tp

  interactivity=interactivity, compiler=compiler, result=result)


In [46]:
df_hyp = pd.DataFrame(hyperparam_table)
df_hyp.head(15)

Unnamed: 0,test_size,random_state,data_size,scaling,test_score,train_score,tn,fp,fn,tp
0,0.01,0,"(65188, 4)",no_scaling,0.740798,0.74797,483,0,169,0
1,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
2,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
3,0.05,0,"(65188, 4)",no_scaling,0.742945,0.748159,2422,0,838,0
4,0.1,0,"(65188, 4)",no_scaling,0.745053,0.748215,4857,0,1662,0
5,0.05,0,"(97508, 4)",min_max,0.582855,0.572534,510,1880,154,2332
6,0.05,0,"(97508, 44)",min_max,0.584085,0.584755,1036,1354,674,1812
7,0.05,0,"(65188, 44)",min_max,0.743252,0.74824,2422,0,837,1
8,0.05,0,"(97508, 44)",no_scaling,0.509844,0.499482,0,2390,0,2486
9,0.05,0,"(97508, 56)",min_max,0.589212,0.591275,1132,1258,745,1741


In [47]:
time = datetime.now().strftime("%Y%m%d_%H:%M:%S")
filename = 'hyperparameter_table'+time+'.csv'
df_hyp.to_csv(filename,index=False)