<a href="https://colab.research.google.com/github/hrwatts/C-Projects/blob/main/Simple_Linear_Regression_C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Linear Regression in C++ #

Linear regression is a fundamental tool of statistical analysis. The line of best fit for a scatter plot is implemented here in C++ in Google Colab. 

In [495]:
%%writefile linreg.h
#pragma once
std::vector<double> linreg(std::vector<double> const& x, std::vector<double> const& y);


Overwriting linreg.h


In [496]:
%%writefile mean.h
#pragma once
double mean(std::vector<double> const& v);

Overwriting mean.h


In [497]:
%%writefile mean.cpp
#include <vector>
#include <numeric>
#include <iostream>
/* (c) Harrison Watts (C++11) for Google Colab
*/

double mean(std::vector<double> const& v){
    if(v.empty()){
        return 0;
    }

    auto const count = static_cast<double>(v.size());
    float bar = 0;
    for(int i = 0; i < count; i++) {
        bar = bar+v[i];
    }
    return bar/count;
    }

Overwriting mean.cpp


In [498]:
%%writefile linreg.cpp
#include <vector>
#include <numeric>
#include <iostream>
#include "mean.h"
/* (c) 2022 Harrison Watts (C++11) for Google Colab
*/

std::vector<double> linreg(std::vector<double> const& x, std::vector<double> const& y){    

    
    auto const count = static_cast<double>(x.size());

    // mean values
    auto const xhat = mean(x);
    auto const yhat = mean(y);

    // sum of squares SSxx
    std::vector<double> SSxxVector (count);
    float SSxx = 0;
    for (int i = 0; i < count; i++) {
        SSxxVector[i] = (x[i] - xhat)*(x[i] - xhat);
        SSxx = SSxx + SSxxVector[i];
    }

    // sum of deviations SSxy
    std::vector<double> SSxyVector (count);
    float SSxy = 0;
    for (int i = 0; i < count; i++) {
        SSxyVector[i] = (x[i] - xhat)*(y[i] - yhat);
        SSxy = SSxy + SSxyVector[i];
    }

    // Linear Regression Parameters
    std::vector<double> beta (2);
    beta[1] = SSxy / SSxx;
    beta[0] = yhat - beta[1]*xhat;

    // Regression Fit
    std::vector<double> fit (count);
    for (int i = 0; i < count; i++) {
        fit[i] = beta[0]+beta[1]*x[i];
    }

    return fit;
};


Overwriting linreg.cpp


In [499]:
%%writefile main.cpp
#include <vector>
#include <numeric>
#include <iostream>
#include "mean.h"
#include "linreg.h"

int main(){


    // Print X & y
    std::vector<double> x{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
    std::vector<double> y{1, 3, 2, 5, 7, 8, 8, 9, 10, 12};  
    std::cout << " X    y  " << "\n";
    for(int i=0;i<x.size();i++){
    std::cout << "  " << x[i] << "  " << y[i] << "\n";
    }

    
    auto const a = linreg(x,y);
    std::cout << "\n Fitted Line" << "\n";
    for(int i=0;i<a.size();i++){
    std::cout << a[i] << "\n";
    }
    return 0;
}

Overwriting main.cpp


In [500]:
%%script bash

g++ main.cpp mean.cpp linreg.cpp -std=c++1z -o test

In [501]:
!./test

 X    y  
  0  1
  1  3
  2  2
  3  5
  4  7
  5  8
  6  8
  7  9
  8  10
  9  12

 Fitted Line
1.23636
2.40606
3.57576
4.74545
5.91515
7.08485
8.25455
9.42424
10.5939
11.7636
