# Wine

### Introduction:

This exercise is a adaptation from the UCI Wine dataset.
The only pupose is to practice deleting data with pandas.

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd


### Step 2. Import the dataset from this [address](https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data).

### Step 3. Assign it to a variable called wine

In [3]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"

wine = pd.read_csv(url, header=None)
print(wine.head())


   0      1     2     3     4    5     6     7     8     9     10    11    12  \
0   1  14.23  1.71  2.43  15.6  127  2.80  3.06  0.28  2.29  5.64  1.04  3.92   
1   1  13.20  1.78  2.14  11.2  100  2.65  2.76  0.26  1.28  4.38  1.05  3.40   
2   1  13.16  2.36  2.67  18.6  101  2.80  3.24  0.30  2.81  5.68  1.03  3.17   
3   1  14.37  1.95  2.50  16.8  113  3.85  3.49  0.24  2.18  7.80  0.86  3.45   
4   1  13.24  2.59  2.87  21.0  118  2.80  2.69  0.39  1.82  4.32  1.04  2.93   

     13  
0  1065  
1  1050  
2  1185  
3  1480  
4   735  


### Step 4. Delete the first, fourth, seventh, nineth, eleventh, thirteenth and fourteenth columns

In [4]:
columns_to_delete = [0, 3, 6, 8, 10, 12, 13]


wine = wine.drop(columns=columns_to_delete)


print(wine.head())


      1     2     4    5     7     9     11
0  14.23  1.71  15.6  127  3.06  2.29  1.04
1  13.20  1.78  11.2  100  2.76  1.28  1.05
2  13.16  2.36  18.6  101  3.24  2.81  1.03
3  14.37  1.95  16.8  113  3.49  2.18  0.86
4  13.24  2.59  21.0  118  2.69  1.82  1.04


### Step 5. Assign the columns as below:

The attributes are (donated by Riccardo Leardi, riclea '@' anchem.unige.it):  
1) alcohol  
2) malic_acid  
3) alcalinity_of_ash  
4) magnesium  
5) flavanoids  
6) proanthocyanins  
7) hue

In [5]:
wine.columns = ['alcohol', 'malic_acid', 'alcalinity_of_ash', 'magnesium', 'flavanoids', 'proanthocyanins', 'hue']


print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    14.23        1.71               15.6        127        3.06   
1    13.20        1.78               11.2        100        2.76   
2    13.16        2.36               18.6        101        3.24   
3    14.37        1.95               16.8        113        3.49   
4    13.24        2.59               21.0        118        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 6. Set the values of the first 3 rows from alcohol as NaN

In [6]:
wine.loc[:2, 'alcohol'] = None
print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      NaN        1.71               15.6        127        3.06   
1      NaN        1.78               11.2        100        2.76   
2      NaN        2.36               18.6        101        3.24   
3    14.37        1.95               16.8        113        3.49   
4    13.24        2.59               21.0        118        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 7. Now set the value of the rows 3 and 4 of magnesium as NaN

In [7]:
wine.loc[2:3, 'magnesium'] = None

print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0      NaN        1.71               15.6      127.0        3.06   
1      NaN        1.78               11.2      100.0        2.76   
2      NaN        2.36               18.6        NaN        3.24   
3    14.37        1.95               16.8        NaN        3.49   
4    13.24        2.59               21.0      118.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 8. Fill the value of NaN with the number 10 in alcohol and 100 in magnesium

In [8]:
wine['alcohol'].fillna(10, inplace=True)

wine['magnesium'].fillna(100, inplace=True)

print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    10.00        1.71               15.6      127.0        3.06   
1    10.00        1.78               11.2      100.0        2.76   
2    10.00        2.36               18.6      100.0        3.24   
3    14.37        1.95               16.8      100.0        3.49   
4    13.24        2.59               21.0      118.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.28  1.05  
2             2.81  1.03  
3             2.18  0.86  
4             1.82  1.04  


### Step 9. Count the number of missing values

In [9]:
missing_values_count = wine.isna().sum()


print(missing_values_count)


alcohol              0
malic_acid           0
alcalinity_of_ash    0
magnesium            0
flavanoids           0
proanthocyanins      0
hue                  0
dtype: int64


### Step 10.  Create an array of 10 random numbers up until 10

In [11]:
import numpy as np

random_numbers = np.random.randint(0, 10, size=10)

print(random_numbers)


[3 7 2 1 3 8 2 7 6 5]


### Step 11.  Use random numbers you generated as an index and assign NaN value to each of cell.

In [13]:
wine.iloc[random_numbers] = np.nan

print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    10.00        1.71               15.6      127.0        3.06   
1      NaN         NaN                NaN        NaN         NaN   
2      NaN         NaN                NaN        NaN         NaN   
3      NaN         NaN                NaN        NaN         NaN   
4    13.24        2.59               21.0      118.0        2.69   

   proanthocyanins   hue  
0             2.29  1.04  
1              NaN   NaN  
2              NaN   NaN  
3              NaN   NaN  
4             1.82  1.04  


### Step 12.  How many missing values do we have?

In [15]:
missing_values_count = wine.isna().sum().sum()

print("Total number of missing values:", missing_values_count)


Total number of missing values: 49


### Step 13. Delete the rows that contain missing values

In [16]:
wine.dropna(inplace=True)

print(wine.head())


    alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0     10.00        1.71               15.6      127.0        3.06   
4     13.24        2.59               21.0      118.0        2.69   
9     13.86        1.35               16.0       98.0        3.15   
10    14.10        2.16               18.0      105.0        3.32   
11    14.12        1.48               16.8       95.0        2.43   

    proanthocyanins   hue  
0              2.29  1.04  
4              1.82  1.04  
9              1.85  1.01  
10             2.38  1.25  
11             1.57  1.17  


### Step 14. Print only the non-null values in alcohol

In [17]:
print(wine['alcohol'].dropna())


0      10.00
4      13.24
9      13.86
10     14.10
11     14.12
       ...  
173    13.71
174    13.40
175    13.27
176    13.17
177    14.13
Name: alcohol, Length: 171, dtype: float64


### Step 15.  Reset the index, so it starts with 0 again

In [18]:
wine.reset_index(drop=True, inplace=True)


print(wine.head())


   alcohol  malic_acid  alcalinity_of_ash  magnesium  flavanoids  \
0    10.00        1.71               15.6      127.0        3.06   
1    13.24        2.59               21.0      118.0        2.69   
2    13.86        1.35               16.0       98.0        3.15   
3    14.10        2.16               18.0      105.0        3.32   
4    14.12        1.48               16.8       95.0        2.43   

   proanthocyanins   hue  
0             2.29  1.04  
1             1.82  1.04  
2             1.85  1.01  
3             2.38  1.25  
4             1.57  1.17  


### BONUS: Create your own question and answer it.