diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..0b7839b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,51 @@ +{ + "files.associations": { + "array": "cpp", + "atomic": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "compare": "cpp", + "concepts": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "new": "cpp", + "numbers": "cpp", + "ostream": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "typeinfo": "cpp" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..05054c5 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,28 @@ +{ + "tasks": [ + { + "type": "cppbuild", + "label": "C/C++: g++ build active file", + "command": "/usr/bin/g++", + "args": [ + "-fdiagnostics-color=always", + "-g", + "${file}", + "-o", + "${fileDirname}/${fileBasenameNoExtension}" + ], + "options": { + "cwd": "${fileDirname}" + }, + "problemMatcher": [ + "$gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "detail": "Task generated by Debugger." + } + ], + "version": "2.0.0" +} \ No newline at end of file diff --git a/MMformat.ps b/MMformat.ps new file mode 100755 index 0000000..860a2bc Binary files /dev/null and b/MMformat.ps differ diff --git a/README.md b/README.md index a1ec567..c1d7997 100644 --- a/README.md +++ b/README.md @@ -1 +1,47 @@ -# cpp +# Matrix Computation Toolkit for Matrix Market format +This repository contains the necessary toolkit for Matrix Market format. + +## Testing + +The repository contains +```bash +mat_mat.cpp +mat_vec.cpp +mmf_product.cpp +``` + +_mat_vec.cpp_ contains the necessary code to run dense matrix and vector product. +_mat_mat.cpp_ contains the necessary code to run dense matrix matrix product. +_mmf_product.cpp_ contains the master code for the aforementioned operations. + +You can take a look at either the individual code or test out sample cases by just double clicking the executables. With the same filename as the cpp files without any extension. + +Please note that executing the _mmf_product_ file requires you to specify the computation type: + + +0. Matrix vector product +1. Matrix matrix product + +Both CMake and the linkage between the cpp files have not been implemented yet. + +## Using your own input file +You can test the code with your own input file by simply editing the paths for the two matrices/matrix-vector: +```bash +matA = "MMformat.ps"; +matB = "MMformat.ps"; +``` + +or uncomment the couts, +```bash +std::cout<<"First Matrix filename is: "; +std::cin >> matA; +std::cout << "Second Matrix filename is : "; +std::cin >> matB; +``` +and compile the _mmf_product.cpp_ by simply typing: +```bash +g++ mmf_product.cpp -o mmf_product +``` +The above line will update the executable file. Once you run the executable you will see the promps where you can type the input path. The results will be stored in the _results.mm_ file. + + diff --git a/assignment0.md b/assignment0.md new file mode 100644 index 0000000..d1120e0 --- /dev/null +++ b/assignment0.md @@ -0,0 +1,24 @@ +## Assignment 0 +#(a) +I tested my code by creating an executable and running on the command prompt. The matrices used for testing are synthetically generated. + +#(b) +The limitations of the matrix market is that it is widely supported for sparse matrices. However, it is not efficient and have limited data types. It does not scale with larger datasets. + +#(c) +Right now, my code reads the size of the matrix when it is inputed. However, if I already know the size of the matrices, I would use static arrays, which is more efficient/ + +#(d) +The matrix is row-major order. This is default for cpp application. If I use column-major it would negatively impact the performance because CPU can cache rows more efficiently. + +#(e) +I used in-line comments to describe the functionality of the code. + +#(f) +The code may not be best at handling all possible edge cases. For example, when the data does not follow the MMTX format and possibly have strings in the data sequence, the proposed algorithm would not be able to handle the information. + +#(g) +Current implementation of the code has different functions defined for the vector and matrix reading. In the future, this could be improved, by reading the user input and automatically switching between vector and matrix within a function. Moreover, the functions are pasted into one single cpp file, which could be improved by using hpp (header) files and importing from another .cpp file for better readability. In addition, there are limitation from the format, where dense matrices cause higher memory consumption. + +#(h) +The approach I would take to optimize the code would be using headers. diff --git a/mat_mat b/mat_mat new file mode 100755 index 0000000..c9f5d30 Binary files /dev/null and b/mat_mat differ diff --git a/mat_mat.cpp b/mat_mat.cpp new file mode 100644 index 0000000..5062f0b --- /dev/null +++ b/mat_mat.cpp @@ -0,0 +1,48 @@ +#include +#include + +std::vector> mat_mat(const std::vector>&mat1,const std::vector>&mat2) { + size_t mat1row = mat1.size(); + size_t mat1col = mat1[0].size(); + size_t mat2row = mat2.size(); + size_t mat2col = mat2[0].size(); + + if (mat1col != mat2row){ + throw std::invalid_argument("Matrix dimensions do not match. Abort"); + } + + std::vector> result(mat1row,std::vector(mat2col,0.0)); + + for (size_t i = 0; i < mat1row; i++){ + for (size_t j = 0; j < mat2col; j++){ + for (size_t k = 0; k < mat1col; k++){ + result[i][j] += mat1[i][k] * mat2[k][j]; + } + } + } + + return result; +} + +void printmat(const std::vector> & mat){ + for (const std::vector & row : mat){ + for (double val: row){ + std::cout << val << " "; + + } + std::cout << std::endl; + } +} + + +int main() { + std::vector> matrix1 = {{1.0,2.0,3.0}, {1.0,2.0,3.0},{1.0,2.0,3.0}}; + std::vector> matrix2 = {{1.0,2.0,3.0}, {1.0,2.0,3.0},{1.0,2.0,3.0}}; + //std::vector vector = {1.0,2.0}; + + std::vector> result = mat_mat(matrix1,matrix2); + std::cout << "The product is: "<< std::endl; + printmat(result); + return 0; +} + diff --git a/mat_vec b/mat_vec new file mode 100755 index 0000000..601c967 Binary files /dev/null and b/mat_vec differ diff --git a/mat_vec.cpp b/mat_vec.cpp new file mode 100644 index 0000000..2517f42 --- /dev/null +++ b/mat_vec.cpp @@ -0,0 +1,35 @@ +#include +#include + +std::vector mat_vec(const std::vector>& mat, const std::vector& vec) { + + size_t matcol = mat[0].size(); + size_t vecsize = vec.size(); + + if(matcol != vecsize) { + throw std::invalid_argument("Matrix and vector dimensions do not match."); + } + + std::vector result(mat.size(), 0.0); + + for(size_t i = 0; i < mat.size(); i++) { + for(size_t j = 0; j < matcol; j++) { + result[i] += mat[i][j] * vec[j]; + } + } + + return result; +} + +int main() { + std::vector> matrix = {{1.0,2.0,3.0}, {1.0,2.0,3.0},{1.0,2.0,3.0}}; + std::vector vector = {1.3,2.0,3.0}; + //std::vector vector = {1.0,2.0}; + + std::vector result = mat_vec(matrix,vector); + std::cout << "The product is: "; + for (double val:result) { + std::cout< +#include +#include +#include +#include + +std::vector readMMformat_vector(const std::string & filename){ + std::ifstream file(filename); + + std::string line; + + std::getline(file, line); // + while (line.substr(0,2) == "%%") { + // if it is a header + std::getline(file,line); // skip to the next line + } + + std::getline(file, line); + while (line.substr(0,2) == "%") { + // if it is a comment + std::getline(file, line); // skip to the next line + } + + std::stringstream ss(line); + + size_t n_row, n_col, entry; + + ss>>n_row>> n_col>> entry; + + std::vector result(std::vector(n_col, 0.0)); //initialize the result matrix + + for (size_t i = 0; i < entry; i++){ + std::getline(file, line); + std::stringstream ss(line); + size_t row, col; + double val; + ss >> row >> col>> val; + result[col-1] = val; + } + file.close(); + return result; +} + + + +std::vector> mat_mat(const std::vector>&mat1,const std::vector>&mat2) { + size_t mat1row = mat1.size(); + size_t mat1col = mat1[0].size(); + size_t mat2row = mat2.size(); + size_t mat2col = mat2[0].size(); + + if (mat1col != mat2row){ + throw std::invalid_argument("Matrix dimensions do not match. Abort"); + } + + std::vector> result(mat1row,std::vector(mat2col,0.0)); + + for (size_t i = 0; i < mat1row; i++){ + for (size_t j = 0; j < mat2col; j++){ + for (size_t k = 0; k < mat1col; k++){ + result[i][j] += mat1[i][k] * mat2[k][j]; + } + } + } + + return result; +} + +std::vector mat_vec(const std::vector>& mat, const std::vector& vec) { + + size_t matcol = mat[0].size(); + size_t vecsize = vec.size(); + + if(matcol != vecsize) { + throw std::invalid_argument("Matrix and vector dimensions do not match."); + } + + std::vector result(mat.size(), 0.0); + + for(size_t i = 0; i < mat.size(); i++) { + for(size_t j = 0; j < matcol; j++) { + result[i] += mat[i][j] * vec[j]; + } + } + + return result; +} + +void saveMMfile(const std::string & filename, const std::vector> & matrix){ + std::ofstream file(filename); + + size_t nrow = matrix.size(); + size_t ncol = matrix[0].size(); + + file << "%%MatrixMarket matrix array real general\n"; + file << nrow << " " << ncol << " "<< nrow << "\n"; + + for (size_t i = 0; i< nrow; i++){ + for (size_t j = 0; j < ncol; j++){ + if (matrix[i][j] != 0.0){ + file << i << " "<< j << " "<< matrix[i][j] << "\n"; + } + + } + } + file.close(); +} + +void saveMMfile_vector(const std::string & filename, const std::vector & matrix){ + std::ofstream file(filename); + + size_t nrow = matrix.size(); + + file << "%%MatrixMarket matrix array real general\n"; + file << nrow << 1 << nrow << "\n"; + + for (size_t i = 0; i< nrow; ++i){ + file << i << 1 << matrix[i] << "\n"; + } + + file.close(); +} + +std::vector> readMMformat(const std::string & filename){ + std::ifstream file(filename); + + + + std::string line; + + std::getline(file, line); + while (line.substr(0,1) == "%") { + // if it is a comment + std::cout << "The comments says: " << line << " \n"; + std::getline(file, line); // skip to the next line + } + + std::stringstream ss(line); + + size_t n_row, n_col, entry; + + ss>>n_row>> n_col>> entry; + + std::vector> result(n_row, std::vector(n_col, 0.0)); //initialize the result matrix + + for (size_t i = 0; i < entry; i++){ + std::getline(file, line); + std::stringstream ss(line); + size_t row, col; + double val; + ss >> row >> col>> val; + result[row-1][col-1] = val; + } + + file.close(); + + return result; +} + + +int main() { + std::string matA, matB; + int computationType; + /* + std::cout << "First Matrix filename is: "; + std::cin >> matA; + std::cout << "Second Matrix filename is: "; + std::cin >> matB; */ + + matA = "MMformat.ps"; + matB = "MMformat.ps"; + std::cout << "Computation type is: "; + std::cin >> computationType; + + std::vector> matrixA = readMMformat(matA); + + if (computationType == 0){ + std::vector matrixB = readMMformat_vector(matB); + std::vector result; + result= mat_vec(matrixA, matrixB); + saveMMfile_vector("result.mm", result); + } + + if (computationType == 1){ + std::vector> matrixB = readMMformat(matB); + std::vector> result; + result = mat_mat(matrixA, matrixB); + + saveMMfile("result.mm", result); + } + return 0; +} diff --git a/result.mm b/result.mm new file mode 100644 index 0000000..d8e0c8e --- /dev/null +++ b/result.mm @@ -0,0 +1,12 @@ +%%MatrixMarket matrix array real general +5 5 5 +0 0 100 +0 4 44.8 +1 1 19.84 +1 2 6.6 +2 1 11 +2 2 15 +3 0 110.25 +3 3 56.25 +3 4 25.2 +4 4 1.44