# Wine

### Introduction:

This exercise is a adaptation from the UCI Wine dataset.
The only pupose is to practice deleting data with pandas.

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data). 

### Step 3. Assign it to a variable called wine

In [2]:
data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'

wine_df = pd.read_csv(data)

wine_df.head()

Unnamed: 0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
0,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
1,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
2,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
3,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735
4,1,14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450


### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns

In [3]:
# del wine_df.iloc[:, [0, 3, 6, 8, 10, 12, 13]]

# wine_df[:, [0, 3, 6, 8, 10, 12, 13]]

# Should be using the drop method instead of del

wine_df.drop(wine_df.columns[[0, 3, 6, 8, 10, 12, 13]], axis=1, inplace=True)

# wine_df.columns[[0, 3, 6, 8, 10, 12, 13]]

In [4]:
len(wine_df.columns)

wine_df.head()

Unnamed: 0,14.23,1.71,15.6,127,3.06,2.29,1.04
0,13.2,1.78,11.2,100,2.76,1.28,1.05
1,13.16,2.36,18.6,101,3.24,2.81,1.03
2,14.37,1.95,16.8,113,3.49,2.18,0.86
3,13.24,2.59,21.0,118,2.69,1.82,1.04
4,14.2,1.76,15.2,112,3.39,1.97,1.05


### Step 5. Assign the columns as below:

The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it):  
1) alcohol  
2) malic_acid  
3) alcalinity_of_ash  
4) magnesium  
5) flavanoids  
6) proanthocyanins  
7) hue 

In [5]:
wine_df.columns = ['alc', 'malic', 'alcalinity', 'magne', 'flava', 'proanth', 'hue']

wine_df.head()

Unnamed: 0,alc,malic,alcalinity,magne,flava,proanth,hue
0,13.2,1.78,11.2,100,2.76,1.28,1.05
1,13.16,2.36,18.6,101,3.24,2.81,1.03
2,14.37,1.95,16.8,113,3.49,2.18,0.86
3,13.24,2.59,21.0,118,2.69,1.82,1.04
4,14.2,1.76,15.2,112,3.39,1.97,1.05


### Step 6. Set the values of the first 3 rows from alcohol as NaN

In [6]:
wine_df.loc[:2, 'alc'] = np.nan

### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN

In [7]:
wine_df.loc[2:3, 'magne'] = np.nan

### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium

In [8]:
wine_df.alc.fillna(10, inplace=True)
wine_df.magne.fillna(100, inplace=True)

wine_df.head()

Unnamed: 0,alc,malic,alcalinity,magne,flava,proanth,hue
0,10.0,1.78,11.2,100.0,2.76,1.28,1.05
1,10.0,2.36,18.6,101.0,3.24,2.81,1.03
2,10.0,1.95,16.8,100.0,3.49,2.18,0.86
3,13.24,2.59,21.0,100.0,2.69,1.82,1.04
4,14.2,1.76,15.2,112.0,3.39,1.97,1.05


### Step 9. Count the number of missing values

In [9]:
wine_df.isnull().sum()

alc           0
malic         0
alcalinity    0
magne         0
flava         0
proanth       0
hue           0
dtype: int64

### Step 10.  Create an array of 10 random numbers up until 10

In [10]:
arr = np.random.randint(0, high=10, size=10)

arr

array([4, 1, 8, 4, 6, 5, 8, 9, 6, 5])

### Step 11.  Set the rows of the random numbers in the column

In [15]:
wine_df.alc[arr]

wine_df.alc[arr] = 'Set'

wine_df.head()

wine_df.alc[arr] = np.nan

wine_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


Unnamed: 0,alc,malic,alcalinity,magne,flava,proanth,hue
0,10.0,1.78,11.2,100.0,2.76,1.28,1.05
1,,2.36,18.6,101.0,3.24,2.81,1.03
2,10.0,1.95,16.8,100.0,3.49,2.18,0.86
3,13.24,2.59,21.0,100.0,2.69,1.82,1.04
4,,1.76,15.2,112.0,3.39,1.97,1.05


In [22]:
# change to view vs copy

wine_df.loc[:, 'alc']

# Basically sounds like I should be using the pandas native access methods

wine_df.iloc[arr]

# Sick!

Unnamed: 0,alc,malic,alcalinity,magne,flava,proanth,hue
4,,1.76,15.2,112.0,3.39,1.97,1.05
1,,2.36,18.6,101.0,3.24,2.81,1.03
8,,1.35,16.0,98.0,3.15,1.85,1.01
4,,1.76,15.2,112.0,3.39,1.97,1.05
6,,2.15,17.6,121.0,2.51,1.25,1.06
5,,1.87,14.6,96.0,2.52,1.98,1.02
8,,1.35,16.0,98.0,3.15,1.85,1.01
9,,2.16,18.0,105.0,3.32,2.38,1.25
6,,2.15,17.6,121.0,2.51,1.25,1.06
5,,1.87,14.6,96.0,2.52,1.98,1.02


### Step 12.  How many missing values do we have?

In [23]:
wine_df.isnull().sum()

alc           6
malic         0
alcalinity    0
magne         0
flava         0
proanth       0
hue           0
dtype: int64

### Step 14. Print only the non-null values in alcohol

In [27]:
wine_df.alc[wine_df.alc.notnull()]

0         10
2         10
3      13.24
7      14.83
10     14.12
11     13.75
12     14.75
13     14.38
14     13.63
15      14.3
16     13.83
17     14.19
18     13.64
19     14.06
20     12.93
21     13.71
22     12.85
23      13.5
24     13.05
25     13.39
26      13.3
27     13.87
28     14.02
29     13.73
30     13.58
31     13.68
32     13.76
33     13.51
34     13.48
35     13.28
       ...  
147    13.32
148    13.08
149     13.5
150    12.79
151    13.11
152    13.23
153    12.58
154    13.17
155    13.84
156    12.45
157    14.34
158    13.48
159    12.36
160    13.69
161    12.85
162    12.96
163    13.78
164    13.73
165    13.45
166    12.82
167    13.58
168     13.4
169     12.2
170    12.77
171    14.16
172    13.71
173     13.4
174    13.27
175    13.17
176    14.13
Name: alc, Length: 171, dtype: object

### Step 13. Delete the rows that contain missing values

In [28]:
wine_df.dropna(inplace=True)

wine_df.isnull().sum()

alc           0
malic         0
alcalinity    0
magne         0
flava         0
proanth       0
hue           0
dtype: int64

### Step 15.  Reset the index, so it starts with 0 again

In [31]:
wine_df.head()

wine_df.reset_index(drop=True, inplace=True)

wine_df[:10]

Unnamed: 0,alc,malic,alcalinity,magne,flava,proanth,hue
0,10.0,1.78,11.2,100.0,2.76,1.28,1.05
1,10.0,1.95,16.8,100.0,3.49,2.18,0.86
2,13.24,2.59,21.0,100.0,2.69,1.82,1.04
3,14.83,1.64,14.0,97.0,2.98,1.98,1.08
4,14.12,1.48,16.8,95.0,2.43,1.57,1.17
5,13.75,1.73,16.0,89.0,2.76,1.81,1.15
6,14.75,1.73,11.4,91.0,3.69,2.81,1.25
7,14.38,1.87,12.0,102.0,3.64,2.96,1.2
8,13.63,1.81,17.2,112.0,2.91,1.46,1.28
9,14.3,1.92,20.0,120.0,3.14,1.97,1.07


### BONUS: Create your own question and answer it.