# Prepare tutorial dataset from ATLAS Open Data

In [1]:

#!pip install uproot
import uproot
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None) # to see all columns of df.head()
import time


In [2]:
#load signal, background data
# ATLAS 13 TeV simulation
#More info on dataset  : http://opendata.atlas.cern/release/2020/documentation/datasets/mc.html

nevents=200000  # limit number of events to treat
#nevents=1E10 

inputdatapath="/Users/rousseau/DocumentsLourds/OpenData/CERNOpenData/dataWW_ATLAS_openData13TeV/"
outputdatapath="/Users/rousseau/Downloads/"

#detaillevel="d0"
detaillevel="d1"


# chose one of the file to be filtered
outkey="qq"
outkey="ggH"
outkey="VBFH"

# http://opendata.cern.ch/eos/opendata/atlas/OutreachDatasets/2020-01-22/2lep/MC/mc_363492.llvv.2lep.root 
if outkey=="ggH":
    inputfilename=inputdatapath+"mc_345324.ggH125_WW2lep.2lep.root"
    outputfilename=outputdatapath+"dataWW_ggH"+"_"+detaillevel+".csv.gz" 
elif outkey=="VBFH":
    inputfilename=inputdatapath+"mc_345323.VBFH125_WW2lep.2lep.root"
    outputfilename=outputdatapath+"dataWW_VBFH"+"_"+detaillevel+".csv.gz" 

elif outkey=="qq":
    inputfilename=inputdatapath+"mc_363492.llvv.2lep.root"
    outputfilename=outputdatapath+"dataWW_qq"+"_"+detaillevel+".csv.gz" 


print ("Will read file:",inputfilename)
print ("Will write file",outputfilename)
tree = uproot.open(inputfilename)  


    
#http://opendata.cern.ch/eos/opendata/atlas/OutreachDatasets/2020-01-22/2lep/MC/mc_345323.VBFH125_WW2lep.2lep.root
#tree = uproot.open("dataWW/mc_161005.ggH125_WW2lep.root") ; outhdf="dataWW/dataWW_ggH.hdf"; outkey="ggH" # http://opendata.cern.ch/record/3825

#tree = uproot.open("dataWW/mc_161055.VBFH125_WW2lep.root"); outhdf="dataWW/dataWW_VBFH.hdf"; outkey="VBFH" # http://opendata.cern.ch/record/3826






Will read file: /Users/rousseau/DocumentsLourds/OpenData/CERNOpenData/dataWW_ATLAS_openData13TeV/mc_345323.VBFH125_WW2lep.2lep.root
Will write file /Users/rousseau/Downloads/dataWW_VBFH_d1.csv.gz


In [3]:
roottree=tree['mini']
roottree.show()
roottree.keys()
print ("Number of entries in the tree:",roottree.numentries)

runNumber                  (no streamer)              asdtype('>i4')
eventNumber                (no streamer)              asdtype('>i4')
channelNumber              (no streamer)              asdtype('>i4')
mcWeight                   (no streamer)              asdtype('>f4')
scaleFactor_PILEUP         (no streamer)              asdtype('>f4')
scaleFactor_ELE            (no streamer)              asdtype('>f4')
scaleFactor_MUON           (no streamer)              asdtype('>f4')
scaleFactor_PHOTON         (no streamer)              asdtype('>f4')
scaleFactor_TAU            (no streamer)              asdtype('>f4')
scaleFactor_BTAG           (no streamer)              asdtype('>f4')
scaleFactor_LepTRIGGER     (no streamer)              asdtype('>f4')
scaleFactor_PhotonTRIGGER  (no streamer)              asdtype('>f4')
trigE                      (no streamer)              asdtype('bool')
trigM                      (no streamer)              asdtype('bool')
trigP                      (no s

In [4]:
roottree.values

<bound method TTreeMethods.values of <TTree b'mini' at 0x0001164d50d0>>

In [5]:
#full list of variables http://opendata.atlas.cern/release/2020/documentation/datasets/dataset13.html

extract_vars_scalar=['runNumber', 'eventNumber', 'channelNumber', 'mcWeight',
       'scaleFactor_PILEUP', 'scaleFactor_ELE', 'scaleFactor_MUON',
       'scaleFactor_PHOTON', 'scaleFactor_TAU', 'scaleFactor_BTAG',
       'scaleFactor_LepTRIGGER', 'scaleFactor_PhotonTRIGGER', 'trigE', 'trigM',
       'trigP', 'lep_n', 'met_et',
       'met_phi', 'jet_n', 'photon_n',
       'tau_n', "ditau_m",  
       'met_et_syst',
       'XSection', 'SumWeights', 'largeRjet_n']

extract_vars_array=['lep_truthMatched', 'lep_trigMatched', 'lep_pt',
       'lep_eta', 'lep_phi', 'lep_E', 'lep_z0', 'lep_charge', 'lep_type',
       'lep_isTightID', 'lep_ptcone30', 'lep_etcone20',
       'lep_trackd0pvunbiased', 'lep_tracksigd0pvunbiased', 
        'jet_pt', 'jet_eta', 'jet_phi', 'jet_E', 'jet_jvt',
       'jet_trueflav', 'jet_truthMatched', 'jet_MV2c10', 
       'photon_truthMatched', 'photon_trigMatched', 'photon_pt', 'photon_eta',
       'photon_phi', 'photon_E', 'photon_isTightID', 'photon_ptcone30',
       'photon_etcone20', 'photon_convType', 'tau_pt', 'tau_eta',
       'tau_phi', 'tau_E', 'tau_isTightID', 'tau_truthMatched',
       'tau_trigMatched', 'tau_nTracks', 'tau_BDTid',  'lep_pt_syst',
        'jet_pt_syst', 'photon_pt_syst', 'tau_pt_syst',
       'largeRjet_pt',
       'largeRjet_eta', 'largeRjet_phi', 'largeRjet_E', 'largeRjet_m',
       'largeRjet_truthMatched', 'largeRjet_D2', 'largeRjet_tau32',
       'largeRjet_pt_syst', 'tau_charge']

# strict minimum just MET and the lepton 4 mom
if detaillevel=="d0":
    extract_vars_scalar=['runNumber', 'eventNumber', 'channelNumber', 'mcWeight',
        'lep_n', 'met_et',
       'met_phi',
       'XSection', 'SumWeights']
    extract_vars_array=['lep_pt',
       'lep_eta', 'lep_phi','lep_charge', 'lep_type']



if detaillevel=="d1":
    extract_vars_scalar=['runNumber', 'eventNumber', 'channelNumber', 'mcWeight',
        'lep_n', 'met_et',
       'met_phi','jet_n',
       'XSection', 'SumWeights']

    extract_vars_array=['lep_pt',
       'lep_eta', 'lep_phi','lep_E','lep_charge', 'lep_type',
       'jet_pt', 'jet_eta', 'jet_phi', 'jet_E']






extract_vars=extract_vars_scalar+extract_vars_array

In [6]:
startcpu = time.process_time()
startwallclock = time.time()


#few var to debug
#extract_vars=[  'runNumber',  'eventNumber',  'channelNumber', 'mcWeight','lep_pt','jet_pt']
#flatten=False mandatory if some variable size array
# FIXME uproot pandas.df is real slow. Should move to numpy reading.
df=roottree.pandas.df(extract_vars,flatten=False,entrystop=nevents) 
#df=roottree.pandas.df(extract_vars,flatten=False,entrystart=35000,entrystop=65000) 

#df.keys() # to have the list of variable
display(df.head())
display(df.describe)

print("CPU time : ", time.process_time() - startcpu)
print("Wall clock time ",time.time() - startwallclock)



Unnamed: 0_level_0,runNumber,eventNumber,channelNumber,mcWeight,lep_n,met_et,met_phi,jet_n,XSection,SumWeights,lep_pt,lep_eta,lep_phi,lep_E,lep_charge,lep_type,jet_pt,jet_eta,jet_phi,jet_E
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,284500,384,345323,3.749917,2,116861.976562,-1.265381,2,1.905877,4389990.0,"[57986.78, 33692.395]","[-0.17389055, -0.87548834]","[-1.4897541, -1.6281896]","[58865.79, 47450.777]","[-1, 1]","[13, 11]","[29894.691, 29083.451]","[-2.4546714, 1.2145077]","[-2.4570606, 0.5750098]","[175423.42, 53506.61]"
1,284500,841,345323,3.749917,2,112019.65625,-0.868992,3,1.905877,4389990.0,"[29739.217, 12656.194]","[-1.0223949, 0.05334374]","[0.9359129, 0.62833863]","[46684.273, 12674.6455]","[1, -1]","[11, 13]","[99282.52, 32057.973, 21531.564]","[-2.3501935, 0.7605696, -1.1584153]","[3.121195, -1.5141237, 1.8501498]","[525478.25, 42347.35, 37972.08]"
2,284500,184,345323,3.749917,2,314240.40625,-0.37074,3,1.905877,4389990.0,"[42921.27, 18685.36]","[-0.8960725, -1.1407573]","[-0.552366, -1.6429868]","[61337.41, 32220.232]","[1, -1]","[13, 11]","[243037.17, 73487.56, 27080.62]","[1.2937653, -2.0674562, 2.209404]","[2.7463968, -2.9270153, 1.278851]","[476635.8, 295285.38, 124964.766]"
3,284500,817,345323,3.749917,2,72023.109375,-0.991769,2,1.905877,4389990.0,"[67622.86, 65901.164]","[0.9023578, 0.62570417]","[-0.0453637, -0.4605257]","[97073.4, 79227.984]","[-1, 1]","[13, 13]","[213954.38, 23830.812]","[2.499094, 2.4734304]","[2.9268243, 0.578938]","[1311011.4, 142503.3]"
4,284500,1588,345323,3.749917,2,4425.194824,-2.43295,0,1.905877,4389990.0,"[40542.984, 21005.13]","[0.4464862, 0.29136705]","[1.6497122, -0.47812402]","[44651.688, 21903.324]","[1, -1]","[11, 13]",[],[],[],[]


<bound method NDFrame.describe of         runNumber  eventNumber  channelNumber  mcWeight  lep_n         met_et  \
entry                                                                           
0          284500          384         345323  3.749917      2  116861.976562   
1          284500          841         345323  3.749917      2  112019.656250   
2          284500          184         345323  3.749917      2  314240.406250   
3          284500          817         345323  3.749917      2   72023.109375   
4          284500         1588         345323  3.749917      2    4425.194824   
...           ...          ...            ...       ...    ...            ...   
199995     284500       197164         345323  3.749917      2   27219.941406   
199996     284500       197053         345323  3.749917      2   34253.167969   
199997     284500       198078         345323  3.749917      2   77188.273438   
199998     284500       198428         345323  3.749917      2   79800.8750

CPU time :  3455.015268
Wall clock time  1760.8257808685303


In [7]:
#display(df.head(100))
allvars=df.columns.tolist() #output columns into list
#check we have correctly listed the scalar and vectors
if (sorted(extract_vars)!=sorted(allvars)):
    print ("Explicit list :", sorted(extract_vars))
    print ("Complete list :", sorted(allvars))

    
    raise Exception("some missing variables! Please fix extract_vars_scalar or extract_vars_array.")

In [8]:
# now keep only the first two elements of each array, with variable name like lep_pt_0 lpt_pt_1
dfflat=df.drop(columns=extract_vars_array)
display(dfflat.head())
#extract_vars_array_debug=["jet_pt","photon_pt"] #truthMatched"] # for debugging

for var in extract_vars_array:
    # print ("treating var=",var)
    dff=pd.DataFrame( df[var].values.tolist())
    #display(dff.head())


    for i in range (2,len(dff.columns)):
        dff=dff.drop(columns=[i]) # only keep first two columns
        
    # if necessary complete up to two columns
    if len(dff.columns)<2:
        dff["add0"]=np.nan
    
    if len(dff.columns)<2:
        dff["add1"]=np.nan

        
    colunames=[]
    for i in range(2):
        colunames+=[var+"_"+str(i)]
    dff.columns=colunames #give names to the two elements

    dfflat=pd.concat([dfflat,dff],axis=1) # merge in


dfflat.head()


Unnamed: 0_level_0,runNumber,eventNumber,channelNumber,mcWeight,lep_n,met_et,met_phi,jet_n,XSection,SumWeights
entry,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,284500,384,345323,3.749917,2,116861.976562,-1.265381,2,1.905877,4389990.0
1,284500,841,345323,3.749917,2,112019.65625,-0.868992,3,1.905877,4389990.0
2,284500,184,345323,3.749917,2,314240.40625,-0.37074,3,1.905877,4389990.0
3,284500,817,345323,3.749917,2,72023.109375,-0.991769,2,1.905877,4389990.0
4,284500,1588,345323,3.749917,2,4425.194824,-2.43295,0,1.905877,4389990.0


Unnamed: 0,runNumber,eventNumber,channelNumber,mcWeight,lep_n,met_et,met_phi,jet_n,XSection,SumWeights,lep_pt_0,lep_pt_1,lep_eta_0,lep_eta_1,lep_phi_0,lep_phi_1,lep_E_0,lep_E_1,lep_charge_0,lep_charge_1,lep_type_0,lep_type_1,jet_pt_0,jet_pt_1,jet_eta_0,jet_eta_1,jet_phi_0,jet_phi_1,jet_E_0,jet_E_1
0,284500,384,345323,3.749917,2,116861.976562,-1.265381,2,1.905877,4389990.0,57986.78125,33692.394531,-0.173891,-0.875488,-1.489754,-1.62819,58865.789062,47450.777344,-1,1,13,11,29894.691406,29083.451172,-2.454671,1.214508,-2.457061,0.57501,175423.4,53506.609375
1,284500,841,345323,3.749917,2,112019.65625,-0.868992,3,1.905877,4389990.0,29739.216797,12656.194336,-1.022395,0.053344,0.935913,0.628339,46684.273438,12674.645508,1,-1,11,13,99282.523438,32057.972656,-2.350194,0.76057,3.121195,-1.514124,525478.2,42347.351562
2,284500,184,345323,3.749917,2,314240.40625,-0.37074,3,1.905877,4389990.0,42921.269531,18685.359375,-0.896073,-1.140757,-0.552366,-1.642987,61337.410156,32220.232422,1,-1,13,11,243037.171875,73487.5625,1.293765,-2.067456,2.746397,-2.927015,476635.8,295285.375
3,284500,817,345323,3.749917,2,72023.109375,-0.991769,2,1.905877,4389990.0,67622.859375,65901.164062,0.902358,0.625704,-0.045364,-0.460526,97073.398438,79227.984375,-1,1,13,13,213954.375,23830.8125,2.499094,2.47343,2.926824,0.578938,1311011.0,142503.296875
4,284500,1588,345323,3.749917,2,4425.194824,-2.43295,0,1.905877,4389990.0,40542.984375,21005.130859,0.446486,0.291367,1.649712,-0.478124,44651.6875,21903.324219,1,-1,11,13,,,,,,,,


In [9]:
# now put some columns at the end to improve visibility

varlist=dfflat.columns.tolist()
for aname in ["mcWeight","runNumber","XSection","SumWeights"]:
    if not aname in varlist:
        continue
    varlist.remove(aname)
    varlist.append(aname)
    
    #varlist.insert(varlist.index("channelNumber")+1,aname)

vnames=[]
for s in varlist: # find all the array number e.g. lep_n
    i=s.find("_n")
    if i>=0 and i==len(s)-2:
        vnames+=[s]

#print(vnames)

for vname in vnames:
    varlist.remove(vname) # remove e.g. lep_n
    aname=vname[:-2]
    found=False
    for i,s in enumerate(varlist): # finds the first e.g. lep_blah
        if s.startswith(aname) :
            found=True
            break
    if found:
        varlist.insert(i,vname) # insert e.g. lep_n before lep_blah


# check we have only reordered
if sorted(dfflat.columns.tolist())!=sorted(varlist):
    print ("before:",sorted(dfflat.columns.tolist()) )
    print ("after :",sorted(varlist) )
    raise Exception("something wrong when reordering the columns!")

# do the reordering
dfflat = dfflat.reindex(columns=varlist)
display(dfflat.head())
display(dfflat.describe())



Unnamed: 0,eventNumber,channelNumber,met_et,met_phi,lep_n,lep_pt_0,lep_pt_1,lep_eta_0,lep_eta_1,lep_phi_0,lep_phi_1,lep_E_0,lep_E_1,lep_charge_0,lep_charge_1,lep_type_0,lep_type_1,jet_n,jet_pt_0,jet_pt_1,jet_eta_0,jet_eta_1,jet_phi_0,jet_phi_1,jet_E_0,jet_E_1,mcWeight,runNumber,XSection,SumWeights
0,384,345323,116861.976562,-1.265381,2,57986.78125,33692.394531,-0.173891,-0.875488,-1.489754,-1.62819,58865.789062,47450.777344,-1,1,13,11,2,29894.691406,29083.451172,-2.454671,1.214508,-2.457061,0.57501,175423.4,53506.609375,3.749917,284500,1.905877,4389990.0
1,841,345323,112019.65625,-0.868992,2,29739.216797,12656.194336,-1.022395,0.053344,0.935913,0.628339,46684.273438,12674.645508,1,-1,11,13,3,99282.523438,32057.972656,-2.350194,0.76057,3.121195,-1.514124,525478.2,42347.351562,3.749917,284500,1.905877,4389990.0
2,184,345323,314240.40625,-0.37074,2,42921.269531,18685.359375,-0.896073,-1.140757,-0.552366,-1.642987,61337.410156,32220.232422,1,-1,13,11,3,243037.171875,73487.5625,1.293765,-2.067456,2.746397,-2.927015,476635.8,295285.375,3.749917,284500,1.905877,4389990.0
3,817,345323,72023.109375,-0.991769,2,67622.859375,65901.164062,0.902358,0.625704,-0.045364,-0.460526,97073.398438,79227.984375,-1,1,13,13,2,213954.375,23830.8125,2.499094,2.47343,2.926824,0.578938,1311011.0,142503.296875,3.749917,284500,1.905877,4389990.0
4,1588,345323,4425.194824,-2.43295,2,40542.984375,21005.130859,0.446486,0.291367,1.649712,-0.478124,44651.6875,21903.324219,1,-1,11,13,0,,,,,,,,,3.749917,284500,1.905877,4389990.0


Unnamed: 0,eventNumber,channelNumber,met_et,met_phi,lep_n,lep_pt_0,lep_pt_1,lep_eta_0,lep_eta_1,lep_phi_0,lep_phi_1,lep_E_0,lep_E_1,lep_charge_0,lep_charge_1,lep_type_0,lep_type_1,jet_n,jet_pt_0,jet_pt_1,jet_eta_0,jet_eta_1,jet_phi_0,jet_phi_1,jet_E_0,jet_E_1,mcWeight,runNumber,XSection,SumWeights
count,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,158858.0,71974.0,158858.0,71974.0,158858.0,71974.0,158858.0,71974.0,200000.0,200000.0,200000.0,200000.0
mean,561836.8,345323.0,61254.44,-0.006217,2.00605,57741.07,25521.282278,0.002152,0.002656,0.00253,0.00904,105659.6,48120.910743,-0.0027,0.0034,11.90871,11.96192,1.32638,91050.82,45860.138548,0.006987,0.003688,-0.006843,0.00513,236105.2,126262.4,3.737287,284500.0,1.90624,4390892.0
std,320847.4,0.0,45102.86,1.812713,0.077996,33593.58,15738.557564,1.158605,1.18789,1.810775,1.814271,84092.79,42408.648203,0.999999,0.999997,0.995827,0.999277,1.058495,68277.79,31418.373602,1.533908,1.572302,1.814658,1.812236,232053.4,119901.8,0.308322,0.0,0.000364,902.0276
min,2.0,345323.0,95.62187,-3.141569,2.0,25000.52,7000.291992,-2.696373,-2.699926,-3.141549,-3.141536,25022.67,7003.763184,-1.0,-1.0,11.0,11.0,0.0,20001.21,20000.445312,-2.499998,-2.499821,-3.141566,-3.14157,20259.17,20202.7,-3.749917,284500.0,1.905877,4389990.0
25%,327700.8,345323.0,33251.7,-1.576492,2.0,37054.54,14427.725586,-0.848496,-0.892411,-1.567547,-1.563081,52350.55,21771.405762,-1.0,-1.0,11.0,11.0,1.0,44187.42,25255.976562,-1.396042,-1.437151,-1.581503,-1.568343,87803.52,48794.66,3.749917,284500.0,1.905877,4389990.0
50%,501445.5,345323.0,51911.54,-0.018328,2.0,49256.17,22259.882812,0.004078,0.006082,0.014238,0.017808,79275.65,35129.060547,-1.0,1.0,11.0,11.0,1.0,73058.82,34536.884766,0.013694,0.004739,-0.009425,0.018331,160776.2,89635.75,3.749917,284500.0,1.905877,4389990.0
75%,769740.2,345323.0,76906.27,1.560394,2.0,68386.8,31861.653809,0.855856,0.897754,1.570543,1.577133,129504.5,58666.938477,1.0,1.0,13.0,13.0,2.0,115963.7,55887.931641,1.4139,1.448666,1.565018,1.568228,300298.3,156426.2,3.749917,284500.0,1.905877,4389990.0
max,1199997.0,345323.0,3634596.0,3.141551,4.0,3624608.0,382168.28125,2.699811,2.699214,3.141534,3.141544,3624717.0,870747.625,1.0,1.0,13.0,13.0,10.0,1658162.0,586593.1875,2.499952,2.499937,3.141559,3.141583,4006936.0,2388901.0,3.749917,284500.0,1.905877,4389990.0


In [10]:
# convert momentum and energy to GeV
for togev in ["met_et","lep_pt_0","lep_pt_1","jet_pt_0","jet_pt_1"]:
    if togev in dfflat.keys():
        dfflat[togev]/=1000.


In [11]:
dfflat.fillna(-7,inplace=True) # replace NaN by unphysical real value
dfflat.head()
print ("number of events",len(dfflat.index))
print ("total weight of these events",dfflat.mcWeight.sum())
print ("channelNumber",dfflat["channelNumber"][0])
print ("SumWeights",dfflat["SumWeights"][0])
print ("XSection",dfflat["XSection"][0])

number of events 200000
total weight of these events 747440.75
channelNumber 345323
SumWeights 4389990.0
XSection 1.9058765


In [12]:
dfflat.describe()

Unnamed: 0,eventNumber,channelNumber,met_et,met_phi,lep_n,lep_pt_0,lep_pt_1,lep_eta_0,lep_eta_1,lep_phi_0,lep_phi_1,lep_E_0,lep_E_1,lep_charge_0,lep_charge_1,lep_type_0,lep_type_1,jet_n,jet_pt_0,jet_pt_1,jet_eta_0,jet_eta_1,jet_phi_0,jet_phi_1,jet_E_0,jet_E_1,mcWeight,runNumber,XSection,SumWeights
count,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0
mean,561836.8,345323.0,61.25496,-0.006217,2.00605,57.741066,25.521282,0.002152,0.002656,0.00253,0.00904,105659.6,48120.910743,-0.0027,0.0034,11.90871,11.96192,1.32638,70.880787,12.022778,-1.434421,-4.479583,-1.445405,-4.479064,187534.6,45433.56,3.737287,284500.0,1.90624,4390892.0
std,320847.4,0.0,45.104282,1.812713,0.077996,33.59358,15.738558,1.158605,1.18789,1.810775,1.814271,84092.79,42408.648203,0.999999,0.999997,0.995827,0.999277,1.058495,72.620436,31.605592,3.145023,3.491334,3.256724,3.533596,227773.0,94055.99,0.308322,0.0,0.000364,902.0276
min,2.0,345323.0,0.095622,-3.141569,2.0,25.000521,7.000292,-2.696373,-2.699926,-3.141549,-3.141536,25022.67,7003.763184,-1.0,-1.0,11.0,11.0,0.0,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,-7.0,-3.749917,284500.0,1.905877,4389990.0
25%,327700.8,345323.0,33.251699,-1.576492,2.0,37.05454,14.427726,-0.848496,-0.892411,-1.567547,-1.563081,52350.55,21771.405762,-1.0,-1.0,11.0,11.0,1.0,24.793918,-7.0,-2.275491,-7.0,-2.790456,-7.0,40119.67,-7.0,3.749917,284500.0,1.905877,4389990.0
50%,501445.5,345323.0,51.911537,-0.018328,2.0,49.25617,22.259883,0.004078,0.006082,0.014238,0.017808,79275.65,35129.060547,-1.0,1.0,11.0,11.0,1.0,57.474021,-7.0,-0.771714,-7.0,-0.828875,-7.0,119368.4,-7.0,3.749917,284500.0,1.905877,4389990.0
75%,769740.2,345323.0,76.906273,1.560394,2.0,68.386799,31.861654,0.855856,0.897754,1.570543,1.577133,129504.5,58666.938477,1.0,1.0,13.0,13.0,2.0,101.699098,26.742459,1.089902,-1.168697,1.157947,-1.226929,251430.5,57233.28,3.749917,284500.0,1.905877,4389990.0
max,1199997.0,345323.0,3634.59575,3.141551,4.0,3624.6075,382.168281,2.699811,2.699214,3.141534,3.141544,3624717.0,870747.625,1.0,1.0,13.0,13.0,10.0,1658.162,586.593187,2.499952,2.499937,3.141559,3.141583,4006936.0,2388901.0,3.749917,284500.0,1.905877,4389990.0


In [13]:
dfflat.to_csv(outputfilename,float_format='%.5g',index = False)# do csv file