<a href="https://colab.research.google.com/github/harnalashok/hadoop/blob/main/hadoop_install_on_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Last amended: 30th March, 2021
# Myfolder: github/hadoop
# Objective:
#            i)  Install hadoop on colab
#                (current version is 3.2.2)
#            ii) Experiments with hadoop
#           iii) Install spark on colab
#            iv) Access hadoop file from spark
#             v) Install koalas on colab
#
#
# Java 8 install: https://stackoverflow.com/a/58191107
# Hadoop install: https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html
# Spark install:  https://stackoverflow.com/a/64183749

## Install hadoop

### Define functions
No downloads. Just function definitions

In [9]:
# 1.0 How to set environment variable
import os  
import time  

#### ssh_install()

In [10]:
# 2.0 Function to install ssh client and sshd (Server)
def ssh_install():
  print("\n--1. Download and install ssh server----\n")
  ! sudo apt-get remove openssh-client openssh-server
  ! sudo apt install openssh-client openssh-server
  
  print("\n--2. Restart ssh server----\n")
  ! service ssh restart

#### Java install

In [11]:
# 3.0 Function to download and install java 8
def install_java():
  ! rm -rf /usr/java

  print("\n--Download and install Java 8----\n")
  !apt-get install -y openjdk-8-jdk-headless -qq > /dev/null        # install openjdk
  os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"     # set environment variable

  !update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
  !update-alternatives --set javac /usr/lib/jvm/java-8-openjdk-amd64/bin/javac
  
  !mkdir -p /usr/java
  ! ln -s "/usr/lib/jvm/java-8-openjdk-amd64"  "/usr/java"
  ! mv "/usr/java/java-8-openjdk-amd64"  "/usr/java/latest"
  
  !java -version       #check java version
  !javac -version

#### hadoop install

In [12]:
# 4.0 Function to download and install hadoop
def hadoop_install():
  print("\n--5. Download hadoop tar.gz----\n")
  ! wget -c https://mirrors.estointernet.in/apache/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz

  print("\n--6. Transfer downloaded content and unzip tar.gz----\n")
  !  mv /content/hadoop*   /opt/
  ! tar -xzvf /opt/hadoop-3.2.2.tar.gz  --directory /opt/

  print("\n--7. Create hadoop folder----\n")
  ! rm -r /app/hadoop/tmp
  ! mkdir  -p   /app/hadoop/tmp
  
  print("\n--8. Check folder for files----\n")
  ! ls -la /opt

#### hadoop config

In [13]:
# 5.0 Function for setting hadoop configuration
def hadoop_config():
  print("\n--Begin Configuring hadoop---\n")
  print("\n=============================\n")
  print("\n--9. core-site.xml----\n")
  ! cat  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml

  print("\n--10. Amend core-site.xml----\n")
  !  echo  '<?xml version="1.0" encoding="UTF-8"?>' >  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  ' <configuration>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '    <property>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '        <name>fs.defaultFS</name>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '        <value>hdfs://localhost:9000</value>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '    </property>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '    <property>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '          <name>hadoop.tmp.dir</name>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '          <value>/app/hadoop/tmp</value>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '       <description>A base for other temporary directories.</description>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '     </property>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml
  !  echo  '  </configuration>' >>  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml

  print("\n--11. Amended core-site.xml----\n")
  ! cat  /opt/hadoop-3.2.2/etc/hadoop/core-site.xml

  print("\n--12. yarn-site.xml----\n")
  !cat /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml

  !echo '<?xml version="1.0" encoding="UTF-8"?>' > /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '<configuration>' >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '    <property>' >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '        <name>yarn.nodemanager.aux-services</name>' >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '        <value>mapreduce_shuffle</value>' >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '    </property>' >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '       <name>yarn.nodemanager.vmem-check-enabled</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '       <value>false</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo '    </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  !echo ' </configuration>'  >> /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml
  
  print("\n--13. Amended yarn-site.xml----\n")
  !cat /opt/hadoop-3.2.2/etc/hadoop/yarn-site.xml

  print("\n--14. mapred-site.xml----\n")
  !cat  /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml

  print("\n--15. Amend mapred-site.xml----\n")
  !echo '<?xml version="1.0"?>'  > /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '<configuration>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '       <name>mapreduce.framework.name</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '        <value>yarn</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '        <name>yarn.app.mapreduce.am.env</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '        <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '       <name>mapreduce.map.env</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '       <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '      <name>mapreduce.reduce.env</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '      <value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '   </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml
  !echo '</configuration>'  >> /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml

  print("\n--16, Amended mapred-site.xml----\n")
  !cat  /opt/hadoop-3.2.2/etc/hadoop/mapred-site.xml

  print("\n---17. hdfs-site.xml----\n")
  !cat  /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  
  print("\n---18. Amend hdfs-site.xml----\n")
  !echo  '<?xml version="1.0" encoding="UTF-8"?> '   > /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>' >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '<configuration>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '    <property>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '        <name>dfs.replication</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '        <value>1</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '    </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '   <property>'   >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '        <name>dfs.block.size</name>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '        <value>16777216</value>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '        <description>Block size</description>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '  </property>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml
  !echo  '</configuration>'  >> /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml

  print("\n---19. Amended hdfs-site.xml----\n")
  !cat  /opt/hadoop-3.2.2/etc/hadoop/hdfs-site.xml

  print("\n---20. hadoop-env.sh----\n")
  # https://stackoverflow.com/a/53140448
  !cat /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"' >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export HDFS_NAMENODE_USER="root"'  >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export HDFS_DATANODE_USER="root"'  >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export HDFS_SECONDARYNAMENODE_USER="root"'  >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export YARN_RESOURCEMANAGER_USER="root"'  >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  ! echo 'export YARN_NODEMANAGER_USER="root"'  >> /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh
  
  print("\n---21. Amended hadoop-env.sh----\n")
  !cat /opt/hadoop-3.2.2/etc/hadoop/hadoop-env.sh


#### ssh keys

In [14]:
# 6.0 Function tp setup ssh passphrase
def set_keys():
  print("\n---22. Generate SSH keys----\n")
  ! cd ~ ; pwd 
  ! cd ~ ; ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
  ! cd ~ ; cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
  ! cd ~ ; chmod 0600 ~/.ssh/authorized_keys


#### Set environment

In [15]:
# 7.0 Function to set up environmental variables
def set_env():
  print("\n---23. Set Environment variables----\n")
  # 'export' command does not work in colab
  # https://stackoverflow.com/a/57240319
  os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"     #set environment variable
  os.environ["JRE_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64/jre"   
  os.environ["HADOOP_HOME"] = "/opt/hadoop-3.2.2"
  os.environ["HADOOP_CONF_DIR"] = "/opt/hadoop-3.2.2/etc/hadoop" 
  os.environ["LD_LIBRARY_PATH"] += ":/opt/hadoop-3.2.2/lib/native"
  os.environ["PATH"] += ":/opt/hadoop-3.2.2/bin:/opt/hadoop-3.2.2/sbin"

#### Install all function

In [16]:
# 8.0 Function to call all functions
def install_hadoop():
  print("\n--Install java----\n")
  ssh_install()
  install_java()  
  hadoop_install()
  hadoop_config()
  set_keys()
  set_env()


### Begin install
Start downloading, install and configure. Takes around 2 minutes

In [17]:
# 9.0 Start installation
start = time.time()
install_hadoop()
end = time.time()
print("\n---Time taken----\n")
print((end- start)/60)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
hadoop-3.2.2/share/doc/hadoop/hadoop-project-dist/hadoop-common/build/source/hadoop-common-project/hadoop-common/target/api/org/apache/hadoop/fs/class-use/AbstractFileSystem.html
hadoop-3.2.2/share/doc/hadoop/hadoop-project-dist/hadoop-common/build/source/hadoop-common-project/hadoop-common/target/api/org/apache/hadoop/fs/class-use/InvalidPathException.html
hadoop-3.2.2/share/doc/hadoop/hadoop-project-dist/hadoop-common/build/source/hadoop-common-project/hadoop-common/target/api/org/apache/hadoop/fs/class-use/Options.CreateOpts.ReplicationFactor.html
hadoop-3.2.2/share/doc/hadoop/hadoop-project-dist/hadoop-common/build/source/hadoop-common-project/hadoop-common/target/api/org/apache/hadoop/fs/class-use/ZeroCopyUnavailableException.html
hadoop-3.2.2/share/doc/hadoop/hadoop-project-dist/hadoop-common/build/source/hadoop-common-project/hadoop-common/target/api/org/apache/hadoop/fs/class-use/Options.CreateOpts.Perms.html
hado

### Format hadoop

In [18]:
# 10.0 Format hadoop
print("\n---24. Format namenode----\n")
!hdfs namenode  -format


---24. Format namenode----

2021-03-30 00:13:42,199 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = 5c96357b6223/172.28.0.2
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 3.2.2
STARTUP_MSG:   classpath = /opt/hadoop-3.2.2/etc/hadoop:/opt/hadoop-3.2.2/share/hadoop/common/lib/curator-recipes-2.13.0.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/commons-io-2.5.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/httpclient-4.5.13.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/commons-net-3.6.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/kerb-admin-1.0.1.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/jackson-core-2.9.10.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/kerb-server-1.0.1.jar:/opt/hadoop-3.2.2/share/hadoop/common/lib/jackson-xc-1.9.13.jar:/opt/hadoop-3.2.2/

### Start and test hadoop

#### Start hadoop

In [None]:
# 11.0 Start namenode
#      If this fails, run
#       ssh_install() below
#        and start hadoop again:

print("\n---25. Start namenode----\n")
! start-dfs.sh

#### Start yarn

In [20]:
# 11.1 Start yarn
! start-yarn.sh

Starting resourcemanager
Starting nodemanagers


In [None]:
# ssh_install()

If `start-dfs.sh` fails, issue the following three commands, one after another:<br>  
`! sudo apt-get remove openssh-client openssh-server`<br>
`! sudo apt-get install openssh-client openssh-server`<br>
`! service ssh restart`<br>

And then try to start hadoop again, as: `start-dfs.sh`

#### Test hadoop

In [None]:
# 11.1
print("\n---26. Make folders in hadoop----\n")
! hdfs dfs -mkdir /user
! hdfs dfs -mkdir /user/ashok

In [None]:
# 11.2 Run hadoop commands
! hdfs dfs -ls /
! hdfs dfs -ls /user

In [None]:
# 11.3 Stopping hadoop
#      Gives some errors
#      But hadoop stops
#!stop-dfs.sh

Run the following commands if hadoop fails to start with `start-dfs.sh` then try again.

In [None]:
################# I am done #############

In [None]:
!pip install pyspark
!pip install -U -q PyDrive

## Install spark

### Define functions

In [24]:
def spark_install():
  print("\n--1. Install findspark----\n")
  !pip install -q findspark

  print("\n--2. Download Apache tar.gz----\n")
  ! wget -c https://mirrors.estointernet.in/apache/spark/spark-3.1.1/spark-3.1.1-bin-hadoop3.2.tgz

  print("\n--3. Transfer downloaded content and unzip tar.gz----\n")
  !  mv /content/spark*   /opt/
  ! tar -xzvf /opt/spark-3.1.1-bin-hadoop3.2.tgz  --directory /opt/

  print("\n--4. Check folder for files----\n")
  ! ls -la /opt


In [27]:
# 7.0 Function to set up environmental variables
def set_spark_env():
  print("\n---5. Set Environment variables----\n")
  os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64" 
  os.environ["JRE_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64/jre" 
  os.environ["SPARK_HOME"] = "/opt/spark-3.1.1-bin-hadoop3.2"     
  os.environ["LD_LIBRARY_PATH"] += ":/opt/spark-3.1.1-bin-hadoop3.2/lib/native"
  os.environ["PATH"] += ":/opt/spark-3.1.1-bin-hadoop3.2/bin:/opt/spark-3.1.1-bin-hadoop3.2/sbin"
  # Check
  ! echo $PATH
  ! echo $LD_LIBRARY_PATH

In [36]:
def spark_conf():
  print("\n---6. Configure spark to access hadoop----\n")
  !mv /opt/spark-3.1.1-bin-hadoop3.2/conf/spark-env.sh.template  /opt/spark-3.1.1-bin-hadoop3.2/conf/spark-env.sh
  !echo "HADOOP_CONF_DIR=/opt/hadoop-3.2.2/etc/hadoop/" >> /opt/spark-3.1.1-bin-hadoop3.2/conf/spark-env.sh
  # Check
  !cat /opt/spark-3.1.1-bin-hadoop3.2/conf/spark-env.sh

### Install spark

In [37]:
def install_spark():
  spark_install()
  set_spark_env()
  spark_conf()
  

### Test

In [43]:
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [44]:
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression

In [45]:
import pandas as pd

In [46]:
pdf = pd.DataFrame({
        'x1': ['a','a','b','b', 'b', 'c', 'd','d'],
        'x2': ['apple', 'orange', 'orange','orange', 'peach', 'peach','apple','orange'],
        'x3': [1, 1, 2, 2, 2, 4, 1, 2],
        'x4': [2.4, 2.5, 3.5, 1.4, 2.1,1.5, 3.0, 2.0],
        'y1': [1, 0, 1, 0, 0, 1, 1, 0],
        'y2': ['yes', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes']
    })


In [47]:
df = spark.createDataFrame(pdf)

In [None]:
df.show()

In [55]:
!echo "a,b,c,d"   > /content/airports.csv
!echo "5,4,6,7"   >> /content/airports.csv
!echo "2,3,4,5"   >> /content/airports.csv
!echo "8,9,0,1"   >> /content/airports.csv
!echo "2,3,4,1"   >> /content/airports.csv
!echo "1,2,2,1"   >> /content/airports.csv
!echo "0,1,2,6"   >> /content/airports.csv
!echo "9,3,1,8"   >> /content/airports.csv
!ls -la /content

total 20
drwxr-xr-x 1 root root 4096 Mar 30 00:52 .
drwxr-xr-x 1 root root 4096 Mar 30 00:13 ..
-rw-r--r-- 1 root root   64 Mar 30 00:55 airports.csv
drwxr-xr-x 4 root root 4096 Mar 18 13:36 .config
drwxr-xr-x 1 root root 4096 Mar 18 13:36 sample_data


In [56]:
!hdfs dfs -rm -f /user/ashok/airports.csv
! hdfs dfs -put /content/airports.csv  /user/ashok/
! hdfs dfs -ls /user/ashok

Deleted /user/ashok/airports.csv
Found 1 items
-rw-r--r--   1 root supergroup         64 2021-03-30 00:56 /user/ashok/airports.csv


In [57]:
airports_df = spark.read.csv("/user/ashok/airports.csv", inferSchema = True, header = True)

In [None]:
airports_df.show()

In [59]:
!pip install koalas

Collecting koalas
[?25l  Downloading https://files.pythonhosted.org/packages/40/de/87c016a3e5055251ed117c86eb3b0de2381518c7acae54e115711ff30ceb/koalas-1.7.0-py3-none-any.whl (1.4MB)
[K     |████████████████████████████████| 1.4MB 6.4MB/s 
Installing collected packages: koalas
Successfully installed koalas-1.7.0


In [60]:
import databricks.koalas as ks



In [61]:
kdf = ks.DataFrame(
    {'a': [1, 2, 3, 4, 5, 6],
     'b': [100, 200, 300, 400, 500, 600],
     'c': ["one", "two", "three", "four", "five", "six"]},
    index=[10, 20, 30, 40, 50, 60])

In [62]:
kdf

Unnamed: 0,a,b,c
10,1,100,one
20,2,200,two
30,3,300,three
40,4,400,four
50,5,500,five
60,6,600,six


In [63]:
pdf = pd.DataFrame({'x':range(3), 'y':['a','b','b'], 'z':['a','b','b']})

In [64]:
# Create a Koalas DataFrame from pandas DataFrame
df = ks.from_pandas(pdf)

In [65]:
# Rename the columns
df.columns = ['x', 'y', 'z1']

In [66]:
# Do some operations in place:
df['x2'] = df.x * df.x

In [67]:
df

Unnamed: 0,x,y,z1,x2
0,0,a,a,0
1,1,b,b,1
2,2,b,b,4
