In [4]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [5]:
data = pd.read_csv('bigmart_train.csv')
data.head()

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052


# Finding Null Values

In [6]:
data.isnull().sum()

Item_Identifier                 0
Item_Weight                  1463
Item_Fat_Content                0
Item_Visibility                 0
Item_Type                       0
Item_MRP                        0
Outlet_Identifier               0
Outlet_Establishment_Year       0
Outlet_Size                  2410
Outlet_Location_Type            0
Outlet_Type                     0
Item_Outlet_Sales               0
dtype: int64

In [7]:
data.shape

(8523, 12)

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8523 entries, 0 to 8522
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Item_Identifier            8523 non-null   object 
 1   Item_Weight                7060 non-null   float64
 2   Item_Fat_Content           8523 non-null   object 
 3   Item_Visibility            8523 non-null   float64
 4   Item_Type                  8523 non-null   object 
 5   Item_MRP                   8523 non-null   float64
 6   Outlet_Identifier          8523 non-null   object 
 7   Outlet_Establishment_Year  8523 non-null   int64  
 8   Outlet_Size                6113 non-null   object 
 9   Outlet_Location_Type       8523 non-null   object 
 10  Outlet_Type                8523 non-null   object 
 11  Item_Outlet_Sales          8523 non-null   float64
dtypes: float64(4), int64(1), object(7)
memory usage: 799.2+ KB


# Removing The Null Values

In [9]:
data.dropna(axis=0,inplace=True)

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4650 entries, 0 to 8522
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Item_Identifier            4650 non-null   object 
 1   Item_Weight                4650 non-null   float64
 2   Item_Fat_Content           4650 non-null   object 
 3   Item_Visibility            4650 non-null   float64
 4   Item_Type                  4650 non-null   object 
 5   Item_MRP                   4650 non-null   float64
 6   Outlet_Identifier          4650 non-null   object 
 7   Outlet_Establishment_Year  4650 non-null   int64  
 8   Outlet_Size                4650 non-null   object 
 9   Outlet_Location_Type       4650 non-null   object 
 10  Outlet_Type                4650 non-null   object 
 11  Item_Outlet_Sales          4650 non-null   float64
dtypes: float64(4), int64(1), object(7)
memory usage: 472.3+ KB


# Removing unwanted columns

In [11]:
data.drop(['Item_Identifier','Outlet_Identifier'],axis=1,inplace=True)

In [12]:
data['Item_Fat_Content'].replace(to_replace='Low Fat',value='low fat',inplace=True)
data['Item_Fat_Content'].replace(to_replace='LF',value='low fat',inplace=True)
data['Item_Fat_Content'].replace(to_replace='reg',value='Regular',inplace=True)


# Preprocessing

In [13]:
le = LabelEncoder()
data.head()

Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,9.3,low fat,0.016047,Dairy,249.8092,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,5.92,Regular,0.019278,Soft Drinks,48.2692,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,17.5,low fat,0.01676,Meat,141.618,1999,Medium,Tier 1,Supermarket Type1,2097.27
4,8.93,low fat,0.0,Household,53.8614,1987,High,Tier 3,Supermarket Type1,994.7052
5,10.395,Regular,0.0,Baking Goods,51.4008,2009,Medium,Tier 3,Supermarket Type2,556.6088


In [14]:
data['Item_Fat_Content'] = le.fit_transform(data['Item_Fat_Content'])
data['Item_Type'] = le.fit_transform(data['Item_Type'])
data['Outlet_Size'] = le.fit_transform(data['Outlet_Size'])
data['Outlet_Location_Type'] = le.fit_transform(data['Outlet_Location_Type'])
data['Outlet_Type'] = le.fit_transform(data['Outlet_Type'])


In [15]:
data.head()

Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,9.3,1,0.016047,4,249.8092,1999,1,0,0,3735.138
1,5.92,0,0.019278,14,48.2692,2009,1,2,1,443.4228
2,17.5,1,0.01676,10,141.618,1999,1,0,0,2097.27
4,8.93,1,0.0,9,53.8614,1987,0,2,0,994.7052
5,10.395,0,0.0,0,51.4008,2009,1,2,1,556.6088


In [21]:
x = data.drop(['Item_Outlet_Sales'],axis=1)
y = data['Item_Outlet_Sales']
data.to_csv(path_or_buf='edited.csv')

In [18]:
import numpy as np
a = np.array(y)
y = a.reshape(4650,1)

In [19]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

In [20]:
model = LinearRegression()
model.fit(x_train,y_train)
y_pred = model.predict(x_test)


In [73]:
f = r2_score(y_test,y_pred)
f

0.4701963743030467

In [74]:
f = mean_squared_error(y_test,y_pred)
f

1208504.9025532098

In [22]:
f = mean_absolute_error(y_test,y_pred)
f

814.636124438689

In [1]:
from PyQt5 import QtWidgets
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWidgets import QDialog
from PyQt5.uic import loadUi

x = []

class page1(QDialog):
    def __init__(self):
        super(page1,self).__init__()
        loadUi('page1.ui',self)
        self.next1.clicked.connect(self.p2)
        
    def p2(self):
        x.clear()
        x.append(float(self.Item_Weight.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
class page2(QDialog):
    def __init__(self):
        super(page2,self).__init__()
        loadUi('page2.ui',self)
        self.next2.clicked.connect(self.p3)
        self.back2.clicked.connect(self.b1)
        
    def p3(self):
        x.append(int(self.Item_Fat_Content.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
    def b1(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
class page3(QDialog):
    def __init__(self):
        super(page3,self).__init__()
        loadUi('page3.ui',self)
        self.next3.clicked.connect(self.p4)
        self.back3.clicked.connect(self.b2)
        
    def b2(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
    def p4(self):
        x.append(float(self.Item_Visibility.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
class page4(QDialog):
    def __init__(self):
        super(page4,self).__init__()
        loadUi('page4.ui',self)
        self.next4.clicked.connect(self.p5)
        self.back4.clicked.connect(self.b3)
        
    def p5(self):
        x.append(int(self.Item_Type.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
    def b3(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
class page5(QDialog):
    def __init__(self):
        super(page5,self).__init__()
        loadUi('page5.ui',self)
        self.next5.clicked.connect(self.p6)
        self.back5.clicked.connect(self.b4)
        
    def p6(self):
        x.append(float(self.Item_MRP.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
    def b4(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
class page6(QDialog):
    def __init__(self):
        super(page6,self).__init__()
        loadUi('page6.ui',self)
        self.next6.clicked.connect(self.p7)
        self.back6.clicked.connect(self.b5)
        
    def p7(self):
        x.append(int(self.Outlet_Establishment_Year.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
    def b5(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
class page7(QDialog):
    def __init__(self):
        super(page7,self).__init__()
        loadUi('page7.ui',self)
        self.next7.clicked.connect(self.p8)
        self.back7.clicked.connect(self.b6)
        
    def b6(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
    def p8(self):
        x.append(int(self.Outlet_Size.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        
class page8(QDialog):
    def __init__(self):
        super(page8,self).__init__()
        loadUi('page8.ui',self)
        self.next8.clicked.connect(self.p9)
        self.back8.clicked.connect(self.b7)
        
    def b7(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
    def p9(self):
        x.append(int(self.Outlet_Location_Type.text()))
        widget.setCurrentIndex(widget.currentIndex()+1)
        print(x)
        
class page9(QDialog):
    def __init__(self):
        super(page9,self).__init__()
        loadUi('page9.ui',self)
        self.next9.clicked.connect(self.predict)
        self.back9.clicked.connect(self.b8)
        
    def b8(self):
        x.pop()
        widget.setCurrentIndex(widget.currentIndex()-1)
        print(x)
        
    def predict(self):
        import pandas as pd
        from sklearn.linear_model import LinearRegression
        from sklearn.model_selection import train_test_split
        
        global x
        x.append(int(self.Outlet_Type.text()))
        data = pd.read_csv('edited.csv')
        data.drop('Unnamed: 0',axis=1,inplace=True)
        X = data.drop(['Item_Outlet_Sales'],axis=1)
        y = data['Item_Outlet_Sales']
        import numpy as np
        a = np.array(y)
        y = a.reshape(4650,1)
        x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.3)
        model = LinearRegression()
        model.fit(x_train,y_train)
        y_pred = model.predict([x])
        self.result.setText(str(y_pred[0][0]))

        
        

if __name__ == '__main__': 
    app = QApplication(sys.argv)
    widget = QtWidgets.QStackedWidget()
    page1 = page1()
    page2 = page2()
    page3 = page3()
    page4 = page4()
    page5 = page5()
    page6 = page6()
    page7 = page7()
    page8 = page8()
    page9 = page9()
    widget.addWidget(page1)
    widget.addWidget(page2)
    widget.addWidget(page3)
    widget.addWidget(page4)
    widget.addWidget(page5)
    widget.addWidget(page6)
    widget.addWidget(page7)
    widget.addWidget(page8)
    widget.addWidget(page9)
    widget.setFixedHeight(441)
    widget.setFixedWidth(721)
    widget.show()
    sys.exit(app.exec_())

ValueError: invalid literal for int() with base 10: 'o'

[9.3, 1, 0.16, 4, 249.8, 1999, 1, 0]




SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
x

Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type
0,9.300,1,0.016047,4,249.8092,1999,1,0,0
1,5.920,0,0.019278,14,48.2692,2009,1,2,1
2,17.500,1,0.016760,10,141.6180,1999,1,0,0
3,8.930,1,0.000000,9,53.8614,1987,0,2,0
4,10.395,0,0.000000,0,51.4008,2009,1,2,1
...,...,...,...,...,...,...,...,...,...
4645,20.750,0,0.083607,5,178.8318,1997,2,0,0
4646,6.865,1,0.056783,13,214.5218,1987,0,2,0
4647,10.600,1,0.035186,8,85.1224,2004,2,1,0
4648,7.210,0,0.145221,13,103.1332,2009,1,2,1


In [22]:
data = pd.read_csv('edited.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,0,9.3,1,0.016047,4,249.8092,1999,1,0,0,3735.138
1,1,5.92,0,0.019278,14,48.2692,2009,1,2,1,443.4228
2,2,17.5,1,0.01676,10,141.618,1999,1,0,0,2097.27
3,4,8.93,1,0.0,9,53.8614,1987,0,2,0,994.7052
4,5,10.395,0,0.0,0,51.4008,2009,1,2,1,556.6088
