Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pendigits analog of test.c #12

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions test-pendig.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#include "Tinn.h"
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>

typedef struct
{
float** in;
float** tg;
int nips;
int nops;
int rows;
}
Data;

typedef struct {
int k;
float tg;
float pd;
} pos;

void output_svg(int j, Data data, int realnum, float pcage, int goodbad)
{
printf("ln -s %05d-%d.svg %s/\n", j, realnum, goodbad ? "good" : "bad");
}

static int lns(FILE* const file)
{
int ch = EOF;
int lines = 0;
int pc = '\n';
while((ch = getc(file)) != EOF)
{
if(ch == '\n')
lines++;
pc = ch;
}
if(pc != '\n')
lines++;
rewind(file);
return lines;
}

static char* readln(FILE* const file)
{
int ch = EOF;
int reads = 0;
int size = 128;
char* line = (char*) malloc((size) * sizeof(char));
while((ch = getc(file)) != '\n' && ch != EOF)
{
line[reads++] = ch;
if(reads + 1 == size)
line = (char*) realloc((line), (size *= 2) * sizeof(char));
}
line[reads] = '\0';
return line;
}

static float** new2d(const int rows, const int cols)
{
float** row = (float**) malloc((rows) * sizeof(float*));
for(int r = 0; r < rows; r++)
row[r] = (float*) malloc((cols) * sizeof(float));
return row;
}

static Data ndata(const int nips, const int nops, const int rows)
{
const Data data = {
new2d(rows, nips), new2d(rows, nops), nips, nops, rows
};
return data;
}

static void parse(const Data data, char* line, const int row)
{
for(int col = 0; col < data.nips; col++)
{
const float val = atof(strtok(col == 0 ? line : NULL, ", "));
data.in[row][col] = val/100.0;
}
const float val = atof(strtok(NULL, ", "));
for(int col = 0; col < data.nops; col++) {
data.tg[row][col] = 0.0;
}
data.tg[row][(int)val] = 1.0;
}

static void dfree(const Data d)
{
for(int row = 0; row < d.rows; row++)
{
free(d.in[row]);
free(d.tg[row]);
}
free(d.in);
free(d.tg);
}

static void shuffle(const Data d)
{
for(int a = 0; a < d.rows; a++)
{
const int b = rand() % d.rows;
float* ot = d.tg[a];
float* it = d.in[a];
// Swap output.
d.tg[a] = d.tg[b];
d.tg[b] = ot;
// Swap input.
d.in[a] = d.in[b];
d.in[b] = it;
}
}

static Data build(const char* path, const int nips, const int nops)
{
FILE* file = fopen(path, "r");
if(file == NULL)
{
printf("Could not open %s\n", path);
printf("Get it from the machine learning database: ");
printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data\n");
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it wrog url

exit(1);
}
const int rows = lns(file);
Data data = ndata(nips, nops, rows);
for(int row = 0; row < rows; row++)
{
char* line = readln(file);
parse(data, line, row);
free(line);
}
fclose(file);
return data;
}

int sort_by_pd(const void *a, const void *b) {
pos x = *(pos*)a;
pos y = *(pos*)b;
if (x.pd > y.pd) { return -1; }
if (x.pd < y.pd) { return +1; }
return 0;
}

int main()
{
// Tinn does not seed the random number generator.
srand(time(0));
// Input and output size is harded coded here as machine learning
// repositories usually don't include the input and output size in the data itself.
const int nips = 16;
const int nops = 10;
// Load the training set.
const Data data = build("pendigits.tes", nips, nops);
// This is how you load the neural network from disk.
const Tinn loaded = xtload("saved.tinn");
pos check[nops];
int correct = 0;

// Now we do a prediction with the neural network we loaded from disk.
for (int j = 0; j < data.rows; j++) {
const float* const in = data.in[j];
const float* const tg = data.tg[j];
const float* const pd = xtpredict(loaded, in);
// To find the "best match", we need to sort by probability (`pd`)
// whilst keeping the target (`tg`) aligned. Copying them into
// our struct and then `qsort`ing on `pd` satisfies this.
for(int i = 0; i < data.nops; i++) {
check[i].k = i;
check[i].tg = tg[i];
check[i].pd = pd[i];
}
qsort(check, data.nops, sizeof(pos), sort_by_pd);
// If the highest probability guess is the correct one, success.
if (check[0].tg == 1) {
correct++;
}
// Otherwise we print out our best guess and the correct answer.
else {
int realnum = -1;
printf("%05d %d %.5f | ", j, check[0].k, (double) check[0].pd);
for (int i=1; i < data.nops; i++) {
if (check[i].tg == 1) {
printf("%d %.5f", check[i].k, (double) check[i].pd);
realnum = i;
}
}
printf("\n");
}
}
//
printf("%d correct out of %d rows\n", correct, data.rows);
// All done. Let's clean up.
xtfree(loaded);
dfree(data);
return 0;
}
188 changes: 188 additions & 0 deletions train-pendig.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#include "Tinn.h"
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <stdlib.h>

typedef struct
{
float** in;
float** tg;
int nips;
int nops;
int rows;
}
Data;

static int lns(FILE* const file)
{
int ch = EOF;
int lines = 0;
int pc = '\n';
while((ch = getc(file)) != EOF)
{
if(ch == '\n')
lines++;
pc = ch;
}
if(pc != '\n')
lines++;
rewind(file);
return lines;
}

static char* readln(FILE* const file)
{
int ch = EOF;
int reads = 0;
int size = 128;
char* line = (char*) malloc((size) * sizeof(char));
while((ch = getc(file)) != '\n' && ch != EOF)
{
line[reads++] = ch;
if(reads + 1 == size)
line = (char*) realloc((line), (size *= 2) * sizeof(char));
}
line[reads] = '\0';
return line;
}

static float** new2d(const int rows, const int cols)
{
float** row = (float**) malloc((rows) * sizeof(float*));
for(int r = 0; r < rows; r++)
row[r] = (float*) malloc((cols) * sizeof(float));
return row;
}

static Data ndata(const int nips, const int nops, const int rows)
{
const Data data = {
new2d(rows, nips), new2d(rows, nops), nips, nops, rows
};
return data;
}

static void parse(const Data data, char* line, const int row)
{
for(int col = 0; col < data.nips; col++)
{
const float val = atof(strtok(col == 0 ? line : NULL, ", "));
/* Input values are 0-100 pixel coordinates; scale to 0.0-1.0 */
data.in[row][col] = val / 100.0;
}
/* Last value is a 0-9 numeral which we need to convert
* into a size 10 vector of {0.00, 1.00}
*/
const float val = atof(strtok(NULL, ", "));
for(int col = 0; col < data.nops; col++) {
data.tg[row][col] = 0.0;
}
data.tg[row][(int)val] = 1.0;
}

static void dfree(const Data d)
{
for(int row = 0; row < d.rows; row++)
{
free(d.in[row]);
free(d.tg[row]);
}
free(d.in);
free(d.tg);
}

static void shuffle(const Data d)
{
for(int a = 0; a < d.rows; a++)
{
const int b = rand() % d.rows;
float* ot = d.tg[a];
float* it = d.in[a];
// Swap output.
d.tg[a] = d.tg[b];
d.tg[b] = ot;
// Swap input.
d.in[a] = d.in[b];
d.in[b] = it;
}
}

static Data build(const char* path, const int nips, const int nops)
{
FILE* file = fopen(path, "r");
if(file == NULL)
{
printf("Could not open %s\n", path);
printf("Get it from the machine learning database: ");
printf("wget http://archive.ics.uci.edu/ml/machine-learning-databases/pendigits/pendigits.tra\n");
exit(1);
}
const int rows = lns(file);
Data data = ndata(nips, nops, rows);
for(int row = 0; row < rows; row++)
{
char* line = readln(file);
parse(data, line, row);
free(line);
}
fclose(file);
return data;
}

int main()
{
// Tinn does not seed the random number generator.
srand(time(0));
// Input and output size is harded coded here as machine learning
// repositories usually don't include the input and output size in the data itself.
const int nips = 16;
const int nops = 10;
// Hyper Parameters.
// Learning rate is annealed and thus not constant.
// It can be fine tuned along with the number of hidden layers.
// Feel free to modify the anneal rate.
// The number of iterations can be changed for stronger training.
float rate = 1.0f;
const int nhid = 28;
const float anneal = 0.99f;
const int iterations = 128;
// Load the training set.
const Data data = build("pendigits.tra", nips, nops);
// Train, baby, train.
const Tinn tinn = xtbuild(nips, nhid, nops);
for(int i = 0; i < iterations; i++)
{
shuffle(data);
float error = 0.0f;
for(int j = 0; j < data.rows; j++)
{
const float* const in = data.in[j];
const float* const tg = data.tg[j];
error += xttrain(tinn, in, tg, rate);
}
printf("error %.12f :: learning rate %f\n",
(double) error / data.rows,
(double) rate);
rate *= anneal;
}
// This is how you save the neural network to disk.
xtsave(tinn, "saved.tinn");
xtfree(tinn);
// This is how you load the neural network from disk.
const Tinn loaded = xtload("saved.tinn");
// Now we do a prediction with the neural network we loaded from disk.
// Ideally, we would also load a testing set to make the prediction with,
// but for the sake of brevity here we just reuse the training set from earlier.
// One data set is picked at random.
const int pick = rand() % data.rows;
const float* const in = data.in[pick];
const float* const tg = data.tg[pick];
const float* const pd = xtpredict(loaded, in);
xtprint(tg, data.nops);
xtprint(pd, data.nops);
// All done. Let's clean up.
xtfree(loaded);
dfree(data);
return 0;
}