scala> dfMuons.show(10,false) 19/05/07 15:03:25 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf. +---------+-----------+-------------+--------+------+----------+------------------+---------------+----------------+----------------+-------------------+-----------------+-------------------+-----+-----+-------+-------------+-----+----------------+---------------+-----------+-------------+------------+-----------+--------------+----------+--------+-----------+------------+------------+---------------------+------------------------+---------+----------+-----+--------+-----------+-----------+-----------+-----------+----------+-----------+------------+----------------+-------+-------------+ |runNumber|eventNumber|channelNumber|mcWeight|pvxp_n|vxp_z |scaleFactor_PILEUP|scaleFactor_ELE|scaleFactor_MUON|scaleFactor_BTAG|scaleFactor_TRIGGER|scaleFactor_JVFSF|scaleFactor_ZVERTEX|trigE|trigM|passGRL|hasGoodVertex|lep_n|lep_truthMatched|lep_trigMatched|lep_pt |lep_eta |lep_phi |lep_E |lep_z0 |lep_charge|lep_type|lep_flag |lep_ptcone30|lep_etcone20|lep_trackd0pvunbiased|lep_tracksigd0pvunbiased|met_et |met_phi |jet_n|alljet_n|jet_pt |jet_eta |jet_phi |jet_E |jet_m |jet_jvf |jet_trueflav|jet_truthMatched|jet_SV0|jet_MV1 | +---------+-----------+-------------+--------+------+----------+------------------+---------------+----------------+----------------+-------------------+-----------------+-------------------+-----+-----+-------+-------------+-----+----------------+---------------+-----------+-------------+------------+-----------+--------------+----------+--------+-----------+------------+------------+---------------------+------------------------+---------+----------+-----+--------+-----------+-----------+-----------+-----------+----------+-----------+------------+----------------+-------+-------------+ |207490 |17281852 |207490 |0.0 |15 |-12.316585|0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[3] |[40531.855]|[0.288244] |[1.3469992] |[42227.465]|[-0.045446984]|[-1.0] |[13] |[568344575]|[0.0] |[94.18325] |[-0.04912882] |[0.0152232405] |94215.1 |-1.3943559|0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | |207490 |17282007 |207490 |0.0 |15 |22.651913 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[3] |[37172.49] |[-0.21478392]|[-2.1380057]|[38033.36] |[0.22578493] |[1.0] |[13] |[568344575]|[0.0] |[65.88673] |[-0.032381147] |[0.025347514] |30354.057|0.3259549 |1 |1 |[27929.969]|[-2.170531]|[1.42328] |[124052.25]|[4573.633]|[0.9523457]|[-99] |[0] |[0.0] |[0.055551887]| |207490 |17282941 |207490 |0.0 |14 |67.00033 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[3] |[41404.363]|[-1.2001014] |[1.3576007] |[74975.45] |[-0.030548852]|[1.0] |[13] |[568344575]|[0.0] |[628.7983] |[-0.0059319637] |[0.018147442] |54632.633|-2.053428 |0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | |207490 |17283582 |207490 |0.0 |14 |25.114586 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[3] |[36330.36] |[1.6244663] |[-0.0825191]|[95780.08] |[0.016617686] |[1.0] |[13] |[568344575]|[0.0] |[78.632385] |[-0.012363502] |[0.017884906] |18974.707|2.3157902 |0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | |207490 |17284798 |207490 |0.0 |13 |5.419942 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[1] |[29865.918]|[1.9333806] |[-2.166267] |[105389.39]|[-0.008121626]|[1.0] |[13] |[568344575]|[0.0] |[228.17558] |[-0.0022841152] |[0.017784366] |18013.09 |0.86960316|0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | |207490 |17285935 |207490 |0.0 |5 |-29.495798|0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[3] |[39808.71] |[-2.2543075] |[-0.8124007] ... 207490 |17288279 |207490 |0.0 |13 |-34.4249 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[1] |[32593.084]|[2.394099] |[-2.8295686]|[180069.89]|[-0.04895938] |[1.0] |[13] |[568344575]|[0.0] |[408.78912] |[0.03346716] |[0.02250342] |26812.076|0.6027184 |0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | |207490 |17286783 |207490 |0.0 |9 |-43.56932 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |0.0 |false|true |true |true |1 |[false] |[1] |[32656.75] |[-2.0470903] |[0.6496125] |[128576.88]|[0.06338643] |[1.0] |[13] |[568344575]|[0.0] |[-1059.2249]|[-0.0042003784] |[0.01684502] |56296.965|-2.1432886|0 |0 |[] |[] |[] |[] |[] |[] |[] |[] |[] |[] | +---------+-----------+-------------+--------+------+----------+------------------+---------------+----------------+----------------+-------------------+-----------------+-------------------+-----+-----+-------+-------------+-----+----------------+---------------+-----------+-------------+------------+-----------+--------------+----------+--------+-----------+------------+------------+---------------------+------------------------+---------+----------+-----+--------+-----------+-----------+-----------+-----------+----------+-----------+------------+----------------+-------+-------------+ only showing top 10 rows scala> dfMuons.write.parquet("dfMuons.parquet") scala> val muonsParquetFile = dfMuons.write.parquet("dfMuons.parquet") org.apache.spark.sql.AnalysisException: path file:/Users/aironman/dfMuons.parquet already exists.; at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:114) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:122) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668) at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270) at org.apache.spark.sql.DataFrameWriter.save(D scala> val gammaParquetFile = dfGamma.write.parquet("dfGammam.parquet") gammaParquetFile: Unit = () scala> dfMuons.count res2: Long = 7028084 scala> dfGamma.count res3: Long = 7917590 scala>