Skip to content

Excessive memory usage in Go : A concrete example  #647

@gopherbot

Description

@gopherbot

by serge.hulne:

Here is the exact same program writtem in Python, in D and in Go.

It is used to sort a relatively large text file which is made by
concatenating Shakespeare plays (downloaded from Gutenberg.com)

The program extracts words form said text using a trivial tokenizer (the
same algorithm in all three cases), stores them is a hash map to keep only
one exemplar of each word and increments a counter, every time a word which
is already in said map is detected again, hence counting the frequency of
occurence of words in the text. The pair (frequency, word) are stored in an
array which is sorted according to frequency in order to display a list of
the word appearing the most frequently in the text.

The problem
-----------

The Go version uses about 90 megabites of RAM whereas the two other
versions use about 8 megabytes of RAM to do the exact same job.

So, basically Go seems to use up 10 times more RAM than Python (or D) for
that kind od task.

Here are the sources:
---------------------


1) Python:
----------


#!/usr/bin/env python



def read_lines(fname):
    
    words_count = {}
    f = file (fname, "r")
    l_cnt = 0
    w_cnt = 0
    words_array  = []
    char_cpt = 0
    inword = False

    for l in f:
        l_cnt+=1
        for c in l:
            char_cpt += 1
            if not c.isspace():
                if inword == False:
                    buf = ""
                    buf += c
                    inword = True
                    w_cnt +=1
                #end if
                else:
                    buf += c
                #end else
            else: 
                if inword ==True:
                    #print "buf = %s" % buf
                    if not buf in words_count:
                        words_count[buf]=0
                    else:
                        words_count[buf]+=1
                    inword = False
                    buf    = ""
                #end if
            #end else
        #end for c
    #end for l
    
    for key in words_count:
        words_array.append((words_count[key], key))
    
    words_array.sort(reverse=True)
    for item in words_array:
        print "(%s,%s)" % (item[0], item[1])
    
    print "lines= %d" % l_cnt
    print "words = %d" % w_cnt

if __name__ == "__main__":
    read_lines("../../shakespeare.txt")



Go language:  
-----------

package main

import (
    "fmt"
    "os"
    "bufio"
    "unicode"
    "sort"
)




//---
type int_word_array []int_word

// Methods required by sort.Interface for to sort structures of the type
int_word.
func (s int_word_array) Len() int           { return len(s) }
func (s int_word_array) Less(i, j int) bool { return s[i].cpt > s[j].cpt }
//(reverse sort)
func (s int_word_array) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }

type int_word struct {
    cpt  int
    word string
}
//---





func main() int {

    words_map := map[string]int{}

    l_cnt := 0
    w_cnt := 0
    cpt_chars := 0
    inword := false
    buf := ""
    
    f, err := os.Open("../shakespeare.txt", os.O_RDONLY, 0666)
    //f, err := os.Open("hamlet.txt", os.O_RDONLY, 0666)

    if err != nil {
        fmt.Printf("\nError => %s\n\n", err)
        os.Exit(1)
    }

    reader := bufio.NewReader(f) //Buffered reader
    
    
    for {
        c, _ ,err := reader.ReadRune() 

        cpt_chars++
        if err != os.EOF && err == nil {
            if c == '\n' {
                l_cnt++
            }

            if unicode.IsSpace(c) == false { 
                if inword == false {
                    buf =  ""
                    buf += string(c)
                    inword = true
                    w_cnt++
                } else {
                    buf += string(c)
                }
            } else if inword == true {
                
            if _, ok := words_map[buf]; ok {
                words_map[buf]++
            } else {
                    words_map[buf] = 1
            }
            
            //fmt.Printf("buf = (%s)\n", buf)
            inword = false
            buf =  ""
            }

            } else { //EOF detected
                if err == os.EOF  {
                    break
            }
        } //end if (err=nil)
    } // end for (main loop)

        
    
    //---
    var words_map_size int = len(words_map)
    var int_words int_word_array
    int_words = make(int_word_array, words_map_size)
    var iw int_word

    int_words_index := 0
    for word, cpt := range words_map {å
        //fmt.Printf("%d =\t\t%s\n", cpt, word)
        iw.cpt = cpt
        iw.word = word
        int_words[int_words_index] = iw
        int_words_index++
    }
    
    sort.Sort(int_words)
    for _, item := range int_words[0:100] {
        fmt.Printf("(%d,%s)\n", item.cpt, item.word)
    }
    //---
    
        
    fmt.Printf("\nlines = %d, words = %d, chars = %d\n", l_cnt, w_cnt, cpt_chars)
    return 0
}

--------------

Serge Hulne

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions