In [1]:
import pandas as pd

## Read csv data from an url into pandas dataframe

In [6]:
# define the url
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

In [3]:
df = pd.read_csv(url)

In [7]:
# display the first five data
df.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


### By default, the first row is considered as column header. We can deal with the column headers in mutiple ways

### 1. a) Set the header parameter as None

In [8]:
df = pd.read_csv(url, header=None)

In [9]:
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### b) Provide custom column names as a list in the names parameter

In [12]:
df = pd.read_csv(url, header = None, names=['Sepal_Len','Sepal_Wid','Petal_Len'])

In [13]:
df.head()

Unnamed: 0,Unnamed: 1,Sepal_Len,Sepal_Wid,Petal_Len
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa


In [17]:
# checking the index of the data
df.index

MultiIndex([(5.1, 3.5),
            (4.9, 3.0),
            (4.7, 3.2),
            (4.6, 3.1),
            (5.0, 3.6),
            (5.4, 3.9),
            (4.6, 3.4),
            (5.0, 3.4),
            (4.4, 2.9),
            (4.9, 3.1),
            ...
            (6.7, 3.1),
            (6.9, 3.1),
            (5.8, 2.7),
            (6.8, 3.2),
            (6.7, 3.3),
            (6.7, 3.0),
            (6.3, 2.5),
            (6.5, 3.0),
            (6.2, 3.4),
            (5.9, 3.0)],
           length=150)

### Here, I experimented by intentionally giving insufficent number of column names in the names parameter. The result is that the columns for which the column names are not provided, got automatically considered as index columns.

### What happens if, we provide more column names than the number of columns in the data?

In [31]:
df = pd.read_csv(url, header = None, names=['Sepal_Len','Sepal_Wid','Petal_len','Petal_Wid','Class','Extra'])

In [32]:
df.head()

Unnamed: 0,Sepal_Len,Sepal_Wid,Petal_len,Petal_Wid,Class,Extra
0,5.1,3.5,1.4,0.2,Iris-setosa,
1,4.9,3.0,1.4,0.2,Iris-setosa,
2,4.7,3.2,1.3,0.2,Iris-setosa,
3,4.6,3.1,1.5,0.2,Iris-setosa,
4,5.0,3.6,1.4,0.2,Iris-setosa,


### An additional column filled with NaN values gets added to the actual data

In [24]:
# Properly naming the columns now
df = pd.read_csv(url, header = None, names = ['Sepal_Len','Sepal_Wid','Petal_len','Petal_Wid','Class'])

In [25]:
df.head()

Unnamed: 0,Sepal_Len,Sepal_Wid,Petal_len,Petal_Wid,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### 2. If we are not sure about the column names, we can provide a prefix for the column names using the prefix parameter

In [20]:
df = pd.read_csv(url, header=None, prefix="Column")

In [21]:
df.head()

Unnamed: 0,Column0,Column1,Column2,Column3,Column4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


### 3. We can also set a column from the data as the index column. But our data does not have any index column. So we can create the data frame and then set the Index column header

In [28]:
print(df.index.name)

None


In [29]:
df.index.name = 'Index'

In [30]:
df.head()

Unnamed: 0_level_0,Sepal_Len,Sepal_Wid,Petal_len,Petal_Wid,Class
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Reference:
### 1. https://medium.com/@andikarachman/pandas-tutorial-importing-csv-files-347b1f5ae256
### 2. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html